xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision 85913cca8bbe6e2bccfd216a6eac95e37d521a86)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43               cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47               cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 
49 static cl::opt<bool>
50 NoCanonicalizeWhiteSpace("strict-whitespace",
51               cl::desc("Do not treat all horizontal whitespace as equivalent"));
52 
53 static cl::list<std::string> ImplicitCheckNot(
54     "implicit-check-not",
55     cl::desc("Add an implicit negative check with this pattern to every\n"
56              "positive check. This can be used to ensure that no instances of\n"
57              "this pattern occur which are not matched by a positive pattern"),
58     cl::value_desc("pattern"));
59 
60 static cl::opt<bool> AllowEmptyInput(
61     "allow-empty", cl::init(false),
62     cl::desc("Allow the input file to be empty. This is useful when making\n"
63              "checks that some error message does not occur, for example."));
64 
65 static cl::opt<bool> MatchFullLines(
66     "match-full-lines", cl::init(false),
67     cl::desc("Require all positive matches to cover an entire input line.\n"
68              "Allows leading and trailing whitespace if --strict-whitespace\n"
69              "is not also passed."));
70 
71 typedef cl::list<std::string>::const_iterator prefix_iterator;
72 
73 //===----------------------------------------------------------------------===//
74 // Pattern Handling Code.
75 //===----------------------------------------------------------------------===//
76 
77 namespace Check {
78   enum CheckType {
79     CheckNone = 0,
80     CheckPlain,
81     CheckNext,
82     CheckSame,
83     CheckNot,
84     CheckDAG,
85     CheckLabel,
86 
87     /// MatchEOF - When set, this pattern only matches the end of file. This is
88     /// used for trailing CHECK-NOTs.
89     CheckEOF
90   };
91 }
92 
93 class Pattern {
94   SMLoc PatternLoc;
95 
96   Check::CheckType CheckTy;
97 
98   /// FixedStr - If non-empty, this pattern is a fixed string match with the
99   /// specified fixed string.
100   StringRef FixedStr;
101 
102   /// RegEx - If non-empty, this is a regex pattern.
103   std::string RegExStr;
104 
105   /// \brief Contains the number of line this pattern is in.
106   unsigned LineNumber;
107 
108   /// VariableUses - Entries in this vector map to uses of a variable in the
109   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
110   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
111   /// value of bar at offset 3.
112   std::vector<std::pair<StringRef, unsigned> > VariableUses;
113 
114   /// VariableDefs - Maps definitions of variables to their parenthesized
115   /// capture numbers.
116   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
117   std::map<StringRef, unsigned> VariableDefs;
118 
119 public:
120 
121   Pattern(Check::CheckType Ty)
122     : CheckTy(Ty) { }
123 
124   /// getLoc - Return the location in source code.
125   SMLoc getLoc() const { return PatternLoc; }
126 
127   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
128   /// which prefix is being matched, SM provides the SourceMgr used for error
129   /// reports, and LineNumber is the line number in the input file from which
130   /// the pattern string was read.  Returns true in case of an error, false
131   /// otherwise.
132   bool ParsePattern(StringRef PatternStr,
133                     StringRef Prefix,
134                     SourceMgr &SM,
135                     unsigned LineNumber);
136 
137   /// Match - Match the pattern string against the input buffer Buffer.  This
138   /// returns the position that is matched or npos if there is no match.  If
139   /// there is a match, the size of the matched string is returned in MatchLen.
140   ///
141   /// The VariableTable StringMap provides the current values of filecheck
142   /// variables and is updated if this match defines new values.
143   size_t Match(StringRef Buffer, size_t &MatchLen,
144                StringMap<StringRef> &VariableTable) const;
145 
146   /// PrintFailureInfo - Print additional information about a failure to match
147   /// involving this pattern.
148   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
149                         const StringMap<StringRef> &VariableTable) const;
150 
151   bool hasVariable() const { return !(VariableUses.empty() &&
152                                       VariableDefs.empty()); }
153 
154   Check::CheckType getCheckTy() const { return CheckTy; }
155 
156 private:
157   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
158   void AddBackrefToRegEx(unsigned BackrefNum);
159 
160   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
161   /// matching this pattern at the start of \arg Buffer; a distance of zero
162   /// should correspond to a perfect match.
163   unsigned ComputeMatchDistance(StringRef Buffer,
164                                const StringMap<StringRef> &VariableTable) const;
165 
166   /// \brief Evaluates expression and stores the result to \p Value.
167   /// \return true on success. false when the expression has invalid syntax.
168   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
169 
170   /// \brief Finds the closing sequence of a regex variable usage or
171   /// definition. Str has to point in the beginning of the definition
172   /// (right after the opening sequence).
173   /// \return offset of the closing sequence within Str, or npos if it was not
174   /// found.
175   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
176 };
177 
178 
179 bool Pattern::ParsePattern(StringRef PatternStr,
180                            StringRef Prefix,
181                            SourceMgr &SM,
182                            unsigned LineNumber) {
183   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
184 
185   this->LineNumber = LineNumber;
186   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
187 
188   // Ignore trailing whitespace.
189   while (!PatternStr.empty() &&
190          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
191     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
192 
193   // Check that there is something on the line.
194   if (PatternStr.empty()) {
195     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
196                     "found empty check string with prefix '" +
197                     Prefix + ":'");
198     return true;
199   }
200 
201   // Check to see if this is a fixed string, or if it has regex pieces.
202   if (!MatchFullLinesHere &&
203       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
204                                  PatternStr.find("[[") == StringRef::npos))) {
205     FixedStr = PatternStr;
206     return false;
207   }
208 
209   if (MatchFullLinesHere) {
210     RegExStr += '^';
211     if (!NoCanonicalizeWhiteSpace)
212       RegExStr += " *";
213   }
214 
215   // Paren value #0 is for the fully matched string.  Any new parenthesized
216   // values add from there.
217   unsigned CurParen = 1;
218 
219   // Otherwise, there is at least one regex piece.  Build up the regex pattern
220   // by escaping scary characters in fixed strings, building up one big regex.
221   while (!PatternStr.empty()) {
222     // RegEx matches.
223     if (PatternStr.startswith("{{")) {
224       // This is the start of a regex match.  Scan for the }}.
225       size_t End = PatternStr.find("}}");
226       if (End == StringRef::npos) {
227         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
228                         SourceMgr::DK_Error,
229                         "found start of regex string with no end '}}'");
230         return true;
231       }
232 
233       // Enclose {{}} patterns in parens just like [[]] even though we're not
234       // capturing the result for any purpose.  This is required in case the
235       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
236       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
237       RegExStr += '(';
238       ++CurParen;
239 
240       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
241         return true;
242       RegExStr += ')';
243 
244       PatternStr = PatternStr.substr(End+2);
245       continue;
246     }
247 
248     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
249     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
250     // second form is [[foo]] which is a reference to foo.  The variable name
251     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
252     // it.  This is to catch some common errors.
253     if (PatternStr.startswith("[[")) {
254       // Find the closing bracket pair ending the match.  End is going to be an
255       // offset relative to the beginning of the match string.
256       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
257 
258       if (End == StringRef::npos) {
259         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
260                         SourceMgr::DK_Error,
261                         "invalid named regex reference, no ]] found");
262         return true;
263       }
264 
265       StringRef MatchStr = PatternStr.substr(2, End);
266       PatternStr = PatternStr.substr(End+4);
267 
268       // Get the regex name (e.g. "foo").
269       size_t NameEnd = MatchStr.find(':');
270       StringRef Name = MatchStr.substr(0, NameEnd);
271 
272       if (Name.empty()) {
273         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
274                         "invalid name in named regex: empty name");
275         return true;
276       }
277 
278       // Verify that the name/expression is well formed. FileCheck currently
279       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
280       // is relaxed, more strict check is performed in \c EvaluateExpression.
281       bool IsExpression = false;
282       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
283         if (i == 0 && Name[i] == '@') {
284           if (NameEnd != StringRef::npos) {
285             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
286                             SourceMgr::DK_Error,
287                             "invalid name in named regex definition");
288             return true;
289           }
290           IsExpression = true;
291           continue;
292         }
293         if (Name[i] != '_' && !isalnum(Name[i]) &&
294             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
295           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
296                           SourceMgr::DK_Error, "invalid name in named regex");
297           return true;
298         }
299       }
300 
301       // Name can't start with a digit.
302       if (isdigit(static_cast<unsigned char>(Name[0]))) {
303         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
304                         "invalid name in named regex");
305         return true;
306       }
307 
308       // Handle [[foo]].
309       if (NameEnd == StringRef::npos) {
310         // Handle variables that were defined earlier on the same line by
311         // emitting a backreference.
312         if (VariableDefs.find(Name) != VariableDefs.end()) {
313           unsigned VarParenNum = VariableDefs[Name];
314           if (VarParenNum < 1 || VarParenNum > 9) {
315             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
316                             SourceMgr::DK_Error,
317                             "Can't back-reference more than 9 variables");
318             return true;
319           }
320           AddBackrefToRegEx(VarParenNum);
321         } else {
322           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
323         }
324         continue;
325       }
326 
327       // Handle [[foo:.*]].
328       VariableDefs[Name] = CurParen;
329       RegExStr += '(';
330       ++CurParen;
331 
332       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
333         return true;
334 
335       RegExStr += ')';
336     }
337 
338     // Handle fixed string matches.
339     // Find the end, which is the start of the next regex.
340     size_t FixedMatchEnd = PatternStr.find("{{");
341     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
342     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
343     PatternStr = PatternStr.substr(FixedMatchEnd);
344   }
345 
346   if (MatchFullLinesHere) {
347     if (!NoCanonicalizeWhiteSpace)
348       RegExStr += " *";
349     RegExStr += '$';
350   }
351 
352   return false;
353 }
354 
355 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
356                               SourceMgr &SM) {
357   Regex R(RS);
358   std::string Error;
359   if (!R.isValid(Error)) {
360     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
361                     "invalid regex: " + Error);
362     return true;
363   }
364 
365   RegExStr += RS.str();
366   CurParen += R.getNumMatches();
367   return false;
368 }
369 
370 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
371   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
372   std::string Backref = std::string("\\") +
373                         std::string(1, '0' + BackrefNum);
374   RegExStr += Backref;
375 }
376 
377 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
378   // The only supported expression is @LINE([\+-]\d+)?
379   if (!Expr.startswith("@LINE"))
380     return false;
381   Expr = Expr.substr(StringRef("@LINE").size());
382   int Offset = 0;
383   if (!Expr.empty()) {
384     if (Expr[0] == '+')
385       Expr = Expr.substr(1);
386     else if (Expr[0] != '-')
387       return false;
388     if (Expr.getAsInteger(10, Offset))
389       return false;
390   }
391   Value = llvm::itostr(LineNumber + Offset);
392   return true;
393 }
394 
395 /// Match - Match the pattern string against the input buffer Buffer.  This
396 /// returns the position that is matched or npos if there is no match.  If
397 /// there is a match, the size of the matched string is returned in MatchLen.
398 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
399                       StringMap<StringRef> &VariableTable) const {
400   // If this is the EOF pattern, match it immediately.
401   if (CheckTy == Check::CheckEOF) {
402     MatchLen = 0;
403     return Buffer.size();
404   }
405 
406   // If this is a fixed string pattern, just match it now.
407   if (!FixedStr.empty()) {
408     MatchLen = FixedStr.size();
409     return Buffer.find(FixedStr);
410   }
411 
412   // Regex match.
413 
414   // If there are variable uses, we need to create a temporary string with the
415   // actual value.
416   StringRef RegExToMatch = RegExStr;
417   std::string TmpStr;
418   if (!VariableUses.empty()) {
419     TmpStr = RegExStr;
420 
421     unsigned InsertOffset = 0;
422     for (const auto &VariableUse : VariableUses) {
423       std::string Value;
424 
425       if (VariableUse.first[0] == '@') {
426         if (!EvaluateExpression(VariableUse.first, Value))
427           return StringRef::npos;
428       } else {
429         StringMap<StringRef>::iterator it =
430             VariableTable.find(VariableUse.first);
431         // If the variable is undefined, return an error.
432         if (it == VariableTable.end())
433           return StringRef::npos;
434 
435         // Look up the value and escape it so that we can put it into the regex.
436         Value += Regex::escape(it->second);
437       }
438 
439       // Plop it into the regex at the adjusted offset.
440       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
441                     Value.begin(), Value.end());
442       InsertOffset += Value.size();
443     }
444 
445     // Match the newly constructed regex.
446     RegExToMatch = TmpStr;
447   }
448 
449 
450   SmallVector<StringRef, 4> MatchInfo;
451   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
452     return StringRef::npos;
453 
454   // Successful regex match.
455   assert(!MatchInfo.empty() && "Didn't get any match");
456   StringRef FullMatch = MatchInfo[0];
457 
458   // If this defines any variables, remember their values.
459   for (const auto &VariableDef : VariableDefs) {
460     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
461     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
462   }
463 
464   MatchLen = FullMatch.size();
465   return FullMatch.data()-Buffer.data();
466 }
467 
468 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
469                               const StringMap<StringRef> &VariableTable) const {
470   // Just compute the number of matching characters. For regular expressions, we
471   // just compare against the regex itself and hope for the best.
472   //
473   // FIXME: One easy improvement here is have the regex lib generate a single
474   // example regular expression which matches, and use that as the example
475   // string.
476   StringRef ExampleString(FixedStr);
477   if (ExampleString.empty())
478     ExampleString = RegExStr;
479 
480   // Only compare up to the first line in the buffer, or the string size.
481   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
482   BufferPrefix = BufferPrefix.split('\n').first;
483   return BufferPrefix.edit_distance(ExampleString);
484 }
485 
486 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
487                                const StringMap<StringRef> &VariableTable) const{
488   // If this was a regular expression using variables, print the current
489   // variable values.
490   if (!VariableUses.empty()) {
491     for (const auto &VariableUse : VariableUses) {
492       SmallString<256> Msg;
493       raw_svector_ostream OS(Msg);
494       StringRef Var = VariableUse.first;
495       if (Var[0] == '@') {
496         std::string Value;
497         if (EvaluateExpression(Var, Value)) {
498           OS << "with expression \"";
499           OS.write_escaped(Var) << "\" equal to \"";
500           OS.write_escaped(Value) << "\"";
501         } else {
502           OS << "uses incorrect expression \"";
503           OS.write_escaped(Var) << "\"";
504         }
505       } else {
506         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
507 
508         // Check for undefined variable references.
509         if (it == VariableTable.end()) {
510           OS << "uses undefined variable \"";
511           OS.write_escaped(Var) << "\"";
512         } else {
513           OS << "with variable \"";
514           OS.write_escaped(Var) << "\" equal to \"";
515           OS.write_escaped(it->second) << "\"";
516         }
517       }
518 
519       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
520                       OS.str());
521     }
522   }
523 
524   // Attempt to find the closest/best fuzzy match.  Usually an error happens
525   // because some string in the output didn't exactly match. In these cases, we
526   // would like to show the user a best guess at what "should have" matched, to
527   // save them having to actually check the input manually.
528   size_t NumLinesForward = 0;
529   size_t Best = StringRef::npos;
530   double BestQuality = 0;
531 
532   // Use an arbitrary 4k limit on how far we will search.
533   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
534     if (Buffer[i] == '\n')
535       ++NumLinesForward;
536 
537     // Patterns have leading whitespace stripped, so skip whitespace when
538     // looking for something which looks like a pattern.
539     if (Buffer[i] == ' ' || Buffer[i] == '\t')
540       continue;
541 
542     // Compute the "quality" of this match as an arbitrary combination of the
543     // match distance and the number of lines skipped to get to this match.
544     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
545     double Quality = Distance + (NumLinesForward / 100.);
546 
547     if (Quality < BestQuality || Best == StringRef::npos) {
548       Best = i;
549       BestQuality = Quality;
550     }
551   }
552 
553   // Print the "possible intended match here" line if we found something
554   // reasonable and not equal to what we showed in the "scanning from here"
555   // line.
556   if (Best && Best != StringRef::npos && BestQuality < 50) {
557       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
558                       SourceMgr::DK_Note, "possible intended match here");
559 
560     // FIXME: If we wanted to be really friendly we would show why the match
561     // failed, as it can be hard to spot simple one character differences.
562   }
563 }
564 
565 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
566   // Offset keeps track of the current offset within the input Str
567   size_t Offset = 0;
568   // [...] Nesting depth
569   size_t BracketDepth = 0;
570 
571   while (!Str.empty()) {
572     if (Str.startswith("]]") && BracketDepth == 0)
573       return Offset;
574     if (Str[0] == '\\') {
575       // Backslash escapes the next char within regexes, so skip them both.
576       Str = Str.substr(2);
577       Offset += 2;
578     } else {
579       switch (Str[0]) {
580         default:
581           break;
582         case '[':
583           BracketDepth++;
584           break;
585         case ']':
586           if (BracketDepth == 0) {
587             SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
588                             SourceMgr::DK_Error,
589                             "missing closing \"]\" for regex variable");
590             exit(1);
591           }
592           BracketDepth--;
593           break;
594       }
595       Str = Str.substr(1);
596       Offset++;
597     }
598   }
599 
600   return StringRef::npos;
601 }
602 
603 
604 //===----------------------------------------------------------------------===//
605 // Check Strings.
606 //===----------------------------------------------------------------------===//
607 
608 /// CheckString - This is a check that we found in the input file.
609 struct CheckString {
610   /// Pat - The pattern to match.
611   Pattern Pat;
612 
613   /// Prefix - Which prefix name this check matched.
614   StringRef Prefix;
615 
616   /// Loc - The location in the match file that the check string was specified.
617   SMLoc Loc;
618 
619   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
620   /// as opposed to a CHECK: directive.
621   //  Check::CheckType CheckTy;
622 
623   /// DagNotStrings - These are all of the strings that are disallowed from
624   /// occurring between this match string and the previous one (or start of
625   /// file).
626   std::vector<Pattern> DagNotStrings;
627 
628   CheckString(const Pattern &P, StringRef S, SMLoc L)
629       : Pat(P), Prefix(S), Loc(L) {}
630 
631   /// Check - Match check string and its "not strings" and/or "dag strings".
632   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
633                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
634 
635   /// CheckNext - Verify there is a single line in the given buffer.
636   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
637 
638   /// CheckSame - Verify there is no newline in the given buffer.
639   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
640 
641   /// CheckNot - Verify there's no "not strings" in the given buffer.
642   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
643                 const std::vector<const Pattern *> &NotStrings,
644                 StringMap<StringRef> &VariableTable) const;
645 
646   /// CheckDag - Match "dag strings" and their mixed "not strings".
647   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
648                   std::vector<const Pattern *> &NotStrings,
649                   StringMap<StringRef> &VariableTable) const;
650 };
651 
652 /// Canonicalize whitespaces in the input file. Line endings are replaced
653 /// with UNIX-style '\n'.
654 ///
655 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
656 /// characters to a single space.
657 static std::unique_ptr<MemoryBuffer>
658 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
659                       bool PreserveHorizontal) {
660   SmallString<128> NewFile;
661   NewFile.reserve(MB->getBufferSize());
662 
663   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
664        Ptr != End; ++Ptr) {
665     // Eliminate trailing dosish \r.
666     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
667       continue;
668     }
669 
670     // If current char is not a horizontal whitespace or if horizontal
671     // whitespace canonicalization is disabled, dump it to output as is.
672     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
673       NewFile.push_back(*Ptr);
674       continue;
675     }
676 
677     // Otherwise, add one space and advance over neighboring space.
678     NewFile.push_back(' ');
679     while (Ptr+1 != End &&
680            (Ptr[1] == ' ' || Ptr[1] == '\t'))
681       ++Ptr;
682   }
683 
684   return std::unique_ptr<MemoryBuffer>(
685       MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
686 }
687 
688 static bool IsPartOfWord(char c) {
689   return (isalnum(c) || c == '-' || c == '_');
690 }
691 
692 // Get the size of the prefix extension.
693 static size_t CheckTypeSize(Check::CheckType Ty) {
694   switch (Ty) {
695   case Check::CheckNone:
696     return 0;
697 
698   case Check::CheckPlain:
699     return sizeof(":") - 1;
700 
701   case Check::CheckNext:
702     return sizeof("-NEXT:") - 1;
703 
704   case Check::CheckSame:
705     return sizeof("-SAME:") - 1;
706 
707   case Check::CheckNot:
708     return sizeof("-NOT:") - 1;
709 
710   case Check::CheckDAG:
711     return sizeof("-DAG:") - 1;
712 
713   case Check::CheckLabel:
714     return sizeof("-LABEL:") - 1;
715 
716   case Check::CheckEOF:
717     llvm_unreachable("Should not be using EOF size");
718   }
719 
720   llvm_unreachable("Bad check type");
721 }
722 
723 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
724   char NextChar = Buffer[Prefix.size()];
725 
726   // Verify that the : is present after the prefix.
727   if (NextChar == ':')
728     return Check::CheckPlain;
729 
730   if (NextChar != '-')
731     return Check::CheckNone;
732 
733   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
734   if (Rest.startswith("NEXT:"))
735     return Check::CheckNext;
736 
737   if (Rest.startswith("SAME:"))
738     return Check::CheckSame;
739 
740   if (Rest.startswith("NOT:"))
741     return Check::CheckNot;
742 
743   if (Rest.startswith("DAG:"))
744     return Check::CheckDAG;
745 
746   if (Rest.startswith("LABEL:"))
747     return Check::CheckLabel;
748 
749   return Check::CheckNone;
750 }
751 
752 // From the given position, find the next character after the word.
753 static size_t SkipWord(StringRef Str, size_t Loc) {
754   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
755     ++Loc;
756   return Loc;
757 }
758 
759 // Try to find the first match in buffer for any prefix. If a valid match is
760 // found, return that prefix and set its type and location.  If there are almost
761 // matches (e.g. the actual prefix string is found, but is not an actual check
762 // string), but no valid match, return an empty string and set the position to
763 // resume searching from. If no partial matches are found, return an empty
764 // string and the location will be StringRef::npos. If one prefix is a substring
765 // of another, the maximal match should be found. e.g. if "A" and "AA" are
766 // prefixes then AA-CHECK: should match the second one.
767 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
768                                          Check::CheckType &CheckTy,
769                                          size_t &CheckLoc) {
770   StringRef FirstPrefix;
771   size_t FirstLoc = StringRef::npos;
772   size_t SearchLoc = StringRef::npos;
773   Check::CheckType FirstTy = Check::CheckNone;
774 
775   CheckTy = Check::CheckNone;
776   CheckLoc = StringRef::npos;
777 
778   for (StringRef Prefix : CheckPrefixes) {
779     size_t PrefixLoc = Buffer.find(Prefix);
780 
781     if (PrefixLoc == StringRef::npos)
782       continue;
783 
784     // Track where we are searching for invalid prefixes that look almost right.
785     // We need to only advance to the first partial match on the next attempt
786     // since a partial match could be a substring of a later, valid prefix.
787     // Need to skip to the end of the word, otherwise we could end up
788     // matching a prefix in a substring later.
789     if (PrefixLoc < SearchLoc)
790       SearchLoc = SkipWord(Buffer, PrefixLoc);
791 
792     // We only want to find the first match to avoid skipping some.
793     if (PrefixLoc > FirstLoc)
794       continue;
795     // If one matching check-prefix is a prefix of another, choose the
796     // longer one.
797     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
798       continue;
799 
800     StringRef Rest = Buffer.drop_front(PrefixLoc);
801     // Make sure we have actually found the prefix, and not a word containing
802     // it. This should also prevent matching the wrong prefix when one is a
803     // substring of another.
804     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
805       FirstTy = Check::CheckNone;
806     else
807       FirstTy = FindCheckType(Rest, Prefix);
808 
809     FirstLoc = PrefixLoc;
810     FirstPrefix = Prefix;
811   }
812 
813   // If the first prefix is invalid, we should continue the search after it.
814   if (FirstTy == Check::CheckNone) {
815     CheckLoc = SearchLoc;
816     return "";
817   }
818 
819   CheckTy = FirstTy;
820   CheckLoc = FirstLoc;
821   return FirstPrefix;
822 }
823 
824 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
825                                          unsigned &LineNumber,
826                                          Check::CheckType &CheckTy,
827                                          size_t &CheckLoc) {
828   while (!Buffer.empty()) {
829     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
830     // If we found a real match, we are done.
831     if (!Prefix.empty()) {
832       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
833       return Prefix;
834     }
835 
836     // We didn't find any almost matches either, we are also done.
837     if (CheckLoc == StringRef::npos)
838       return StringRef();
839 
840     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
841 
842     // Advance to the last possible match we found and try again.
843     Buffer = Buffer.drop_front(CheckLoc + 1);
844   }
845 
846   return StringRef();
847 }
848 
849 /// ReadCheckFile - Read the check file, which specifies the sequence of
850 /// expected strings.  The strings are added to the CheckStrings vector.
851 /// Returns true in case of an error, false otherwise.
852 static bool ReadCheckFile(SourceMgr &SM,
853                           std::vector<CheckString> &CheckStrings) {
854   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
855       MemoryBuffer::getFileOrSTDIN(CheckFilename);
856   if (std::error_code EC = FileOrErr.getError()) {
857     errs() << "Could not open check file '" << CheckFilename
858            << "': " << EC.message() << '\n';
859     return true;
860   }
861 
862   // If we want to canonicalize whitespace, strip excess whitespace from the
863   // buffer containing the CHECK lines. Remove DOS style line endings.
864   std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
865       std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
866 
867   // Find all instances of CheckPrefix followed by : in the file.
868   StringRef Buffer = F->getBuffer();
869 
870   SM.AddNewSourceBuffer(std::move(F), SMLoc());
871 
872   std::vector<Pattern> ImplicitNegativeChecks;
873   for (const auto &PatternString : ImplicitCheckNot) {
874     // Create a buffer with fake command line content in order to display the
875     // command line option responsible for the specific implicit CHECK-NOT.
876     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
877     std::string Suffix = "'";
878     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
879         Prefix + PatternString + Suffix, "command line");
880 
881     StringRef PatternInBuffer =
882         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
883     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
884 
885     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
886     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
887                                                "IMPLICIT-CHECK", SM, 0);
888   }
889 
890 
891   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
892 
893   // LineNumber keeps track of the line on which CheckPrefix instances are
894   // found.
895   unsigned LineNumber = 1;
896 
897   while (1) {
898     Check::CheckType CheckTy;
899     size_t PrefixLoc;
900 
901     // See if a prefix occurs in the memory buffer.
902     StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
903                                                    LineNumber,
904                                                    CheckTy,
905                                                    PrefixLoc);
906     if (UsedPrefix.empty())
907       break;
908 
909     Buffer = Buffer.drop_front(PrefixLoc);
910 
911     // Location to use for error messages.
912     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
913 
914     // PrefixLoc is to the start of the prefix. Skip to the end.
915     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
916 
917     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
918     // leading and trailing whitespace.
919     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
920 
921     // Scan ahead to the end of line.
922     size_t EOL = Buffer.find_first_of("\n\r");
923 
924     // Remember the location of the start of the pattern, for diagnostics.
925     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
926 
927     // Parse the pattern.
928     Pattern P(CheckTy);
929     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
930       return true;
931 
932     // Verify that CHECK-LABEL lines do not define or use variables
933     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
934       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
935                       SourceMgr::DK_Error,
936                       "found '" + UsedPrefix + "-LABEL:'"
937                       " with variable definition or use");
938       return true;
939     }
940 
941     Buffer = Buffer.substr(EOL);
942 
943     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
944     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
945         CheckStrings.empty()) {
946       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
947       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
948                       SourceMgr::DK_Error,
949                       "found '" + UsedPrefix + "-" + Type + "' without previous '"
950                       + UsedPrefix + ": line");
951       return true;
952     }
953 
954     // Handle CHECK-DAG/-NOT.
955     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
956       DagNotMatches.push_back(P);
957       continue;
958     }
959 
960     // Okay, add the string we captured to the output vector and move on.
961     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
962     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
963     DagNotMatches = ImplicitNegativeChecks;
964   }
965 
966   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
967   // prefix as a filler for the error message.
968   if (!DagNotMatches.empty()) {
969     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
970                               SMLoc::getFromPointer(Buffer.data()));
971     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
972   }
973 
974   if (CheckStrings.empty()) {
975     errs() << "error: no check strings found with prefix"
976            << (CheckPrefixes.size() > 1 ? "es " : " ");
977     prefix_iterator I = CheckPrefixes.begin();
978     prefix_iterator E = CheckPrefixes.end();
979     if (I != E) {
980       errs() << "\'" << *I << ":'";
981       ++I;
982     }
983     for (; I != E; ++I)
984       errs() << ", \'" << *I << ":'";
985 
986     errs() << '\n';
987     return true;
988   }
989 
990   return false;
991 }
992 
993 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc,
994                              const Pattern &Pat, StringRef Buffer,
995                              StringMap<StringRef> &VariableTable) {
996   // Otherwise, we have an error, emit an error message.
997   SM.PrintMessage(Loc, SourceMgr::DK_Error,
998                   "expected string not found in input");
999 
1000   // Print the "scanning from here" line.  If the current position is at the
1001   // end of a line, advance to the start of the next line.
1002   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1003 
1004   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1005                   "scanning from here");
1006 
1007   // Allow the pattern to print additional information if desired.
1008   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1009 }
1010 
1011 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1012                              StringRef Buffer,
1013                              StringMap<StringRef> &VariableTable) {
1014   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1015 }
1016 
1017 /// CountNumNewlinesBetween - Count the number of newlines in the specified
1018 /// range.
1019 static unsigned CountNumNewlinesBetween(StringRef Range,
1020                                         const char *&FirstNewLine) {
1021   unsigned NumNewLines = 0;
1022   while (1) {
1023     // Scan for newline.
1024     Range = Range.substr(Range.find_first_of("\n\r"));
1025     if (Range.empty()) return NumNewLines;
1026 
1027     ++NumNewLines;
1028 
1029     // Handle \n\r and \r\n as a single newline.
1030     if (Range.size() > 1 &&
1031         (Range[1] == '\n' || Range[1] == '\r') &&
1032         (Range[0] != Range[1]))
1033       Range = Range.substr(1);
1034     Range = Range.substr(1);
1035 
1036     if (NumNewLines == 1)
1037       FirstNewLine = Range.begin();
1038   }
1039 }
1040 
1041 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1042                           bool IsLabelScanMode, size_t &MatchLen,
1043                           StringMap<StringRef> &VariableTable) const {
1044   size_t LastPos = 0;
1045   std::vector<const Pattern *> NotStrings;
1046 
1047   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1048   // bounds; we have not processed variable definitions within the bounded block
1049   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1050   // over the block again (including the last CHECK-LABEL) in normal mode.
1051   if (!IsLabelScanMode) {
1052     // Match "dag strings" (with mixed "not strings" if any).
1053     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1054     if (LastPos == StringRef::npos)
1055       return StringRef::npos;
1056   }
1057 
1058   // Match itself from the last position after matching CHECK-DAG.
1059   StringRef MatchBuffer = Buffer.substr(LastPos);
1060   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1061   if (MatchPos == StringRef::npos) {
1062     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1063     return StringRef::npos;
1064   }
1065 
1066   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1067   // or CHECK-NOT
1068   if (!IsLabelScanMode) {
1069     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1070 
1071     // If this check is a "CHECK-NEXT", verify that the previous match was on
1072     // the previous line (i.e. that there is one newline between them).
1073     if (CheckNext(SM, SkippedRegion))
1074       return StringRef::npos;
1075 
1076     // If this check is a "CHECK-SAME", verify that the previous match was on
1077     // the same line (i.e. that there is no newline between them).
1078     if (CheckSame(SM, SkippedRegion))
1079       return StringRef::npos;
1080 
1081     // If this match had "not strings", verify that they don't exist in the
1082     // skipped region.
1083     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1084       return StringRef::npos;
1085   }
1086 
1087   return LastPos + MatchPos;
1088 }
1089 
1090 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1091   if (Pat.getCheckTy() != Check::CheckNext)
1092     return false;
1093 
1094   // Count the number of newlines between the previous match and this one.
1095   assert(Buffer.data() !=
1096          SM.getMemoryBuffer(
1097            SM.FindBufferContainingLoc(
1098              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1099          "CHECK-NEXT can't be the first check in a file");
1100 
1101   const char *FirstNewLine = nullptr;
1102   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1103 
1104   if (NumNewLines == 0) {
1105     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1106                     "-NEXT: is on the same line as previous match");
1107     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1108                     SourceMgr::DK_Note, "'next' match was here");
1109     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1110                     "previous match ended here");
1111     return true;
1112   }
1113 
1114   if (NumNewLines != 1) {
1115     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1116                     "-NEXT: is not on the line after the previous match");
1117     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1118                     SourceMgr::DK_Note, "'next' match was here");
1119     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1120                     "previous match ended here");
1121     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1122                     "non-matching line after previous match is here");
1123     return true;
1124   }
1125 
1126   return false;
1127 }
1128 
1129 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1130   if (Pat.getCheckTy() != Check::CheckSame)
1131     return false;
1132 
1133   // Count the number of newlines between the previous match and this one.
1134   assert(Buffer.data() !=
1135              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1136                                     SMLoc::getFromPointer(Buffer.data())))
1137                  ->getBufferStart() &&
1138          "CHECK-SAME can't be the first check in a file");
1139 
1140   const char *FirstNewLine = nullptr;
1141   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1142 
1143   if (NumNewLines != 0) {
1144     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1145                     Prefix +
1146                         "-SAME: is not on the same line as the previous match");
1147     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1148                     "'next' match was here");
1149     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1150                     "previous match ended here");
1151     return true;
1152   }
1153 
1154   return false;
1155 }
1156 
1157 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1158                            const std::vector<const Pattern *> &NotStrings,
1159                            StringMap<StringRef> &VariableTable) const {
1160   for (const Pattern *Pat : NotStrings) {
1161     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1162 
1163     size_t MatchLen = 0;
1164     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1165 
1166     if (Pos == StringRef::npos) continue;
1167 
1168     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1169                     SourceMgr::DK_Error,
1170                     Prefix + "-NOT: string occurred!");
1171     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1172                     Prefix + "-NOT: pattern specified here");
1173     return true;
1174   }
1175 
1176   return false;
1177 }
1178 
1179 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1180                              std::vector<const Pattern *> &NotStrings,
1181                              StringMap<StringRef> &VariableTable) const {
1182   if (DagNotStrings.empty())
1183     return 0;
1184 
1185   size_t LastPos = 0;
1186   size_t StartPos = LastPos;
1187 
1188   for (const Pattern &Pat : DagNotStrings) {
1189     assert((Pat.getCheckTy() == Check::CheckDAG ||
1190             Pat.getCheckTy() == Check::CheckNot) &&
1191            "Invalid CHECK-DAG or CHECK-NOT!");
1192 
1193     if (Pat.getCheckTy() == Check::CheckNot) {
1194       NotStrings.push_back(&Pat);
1195       continue;
1196     }
1197 
1198     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1199 
1200     size_t MatchLen = 0, MatchPos;
1201 
1202     // CHECK-DAG always matches from the start.
1203     StringRef MatchBuffer = Buffer.substr(StartPos);
1204     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1205     // With a group of CHECK-DAGs, a single mismatching means the match on
1206     // that group of CHECK-DAGs fails immediately.
1207     if (MatchPos == StringRef::npos) {
1208       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1209       return StringRef::npos;
1210     }
1211     // Re-calc it as the offset relative to the start of the original string.
1212     MatchPos += StartPos;
1213 
1214     if (!NotStrings.empty()) {
1215       if (MatchPos < LastPos) {
1216         // Reordered?
1217         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1218                         SourceMgr::DK_Error,
1219                         Prefix + "-DAG: found a match of CHECK-DAG"
1220                         " reordering across a CHECK-NOT");
1221         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1222                         SourceMgr::DK_Note,
1223                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1224                         " is found here");
1225         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1226                         Prefix + "-NOT: the crossed pattern specified"
1227                         " here");
1228         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1229                         Prefix + "-DAG: the reordered pattern specified"
1230                         " here");
1231         return StringRef::npos;
1232       }
1233       // All subsequent CHECK-DAGs should be matched from the farthest
1234       // position of all precedent CHECK-DAGs (including this one.)
1235       StartPos = LastPos;
1236       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1237       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1238       // region.
1239       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1240       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1241         return StringRef::npos;
1242       // Clear "not strings".
1243       NotStrings.clear();
1244     }
1245 
1246     // Update the last position with CHECK-DAG matches.
1247     LastPos = std::max(MatchPos + MatchLen, LastPos);
1248   }
1249 
1250   return LastPos;
1251 }
1252 
1253 // A check prefix must contain only alphanumeric, hyphens and underscores.
1254 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1255   Regex Validator("^[a-zA-Z0-9_-]*$");
1256   return Validator.match(CheckPrefix);
1257 }
1258 
1259 static bool ValidateCheckPrefixes() {
1260   StringSet<> PrefixSet;
1261 
1262   for (StringRef Prefix : CheckPrefixes) {
1263     // Reject empty prefixes.
1264     if (Prefix == "")
1265       return false;
1266 
1267     if (!PrefixSet.insert(Prefix).second)
1268       return false;
1269 
1270     if (!ValidateCheckPrefix(Prefix))
1271       return false;
1272   }
1273 
1274   return true;
1275 }
1276 
1277 // I don't think there's a way to specify an initial value for cl::list,
1278 // so if nothing was specified, add the default
1279 static void AddCheckPrefixIfNeeded() {
1280   if (CheckPrefixes.empty())
1281     CheckPrefixes.push_back("CHECK");
1282 }
1283 
1284 int main(int argc, char **argv) {
1285   sys::PrintStackTraceOnErrorSignal();
1286   PrettyStackTraceProgram X(argc, argv);
1287   cl::ParseCommandLineOptions(argc, argv);
1288 
1289   if (!ValidateCheckPrefixes()) {
1290     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1291               "start with a letter and contain only alphanumeric characters, "
1292               "hyphens and underscores\n";
1293     return 2;
1294   }
1295 
1296   AddCheckPrefixIfNeeded();
1297 
1298   SourceMgr SM;
1299 
1300   // Read the expected strings from the check file.
1301   std::vector<CheckString> CheckStrings;
1302   if (ReadCheckFile(SM, CheckStrings))
1303     return 2;
1304 
1305   // Open the file to check and add it to SourceMgr.
1306   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1307       MemoryBuffer::getFileOrSTDIN(InputFilename);
1308   if (std::error_code EC = FileOrErr.getError()) {
1309     errs() << "Could not open input file '" << InputFilename
1310            << "': " << EC.message() << '\n';
1311     return 2;
1312   }
1313   std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1314 
1315   if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1316     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1317     return 2;
1318   }
1319 
1320   // Remove duplicate spaces in the input file if requested.
1321   // Remove DOS style line endings.
1322   std::unique_ptr<MemoryBuffer> F =
1323       CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1324 
1325   // Check that we have all of the expected strings, in order, in the input
1326   // file.
1327   StringRef Buffer = F->getBuffer();
1328 
1329   SM.AddNewSourceBuffer(std::move(F), SMLoc());
1330 
1331   /// VariableTable - This holds all the current filecheck variables.
1332   StringMap<StringRef> VariableTable;
1333 
1334   bool hasError = false;
1335 
1336   unsigned i = 0, j = 0, e = CheckStrings.size();
1337 
1338   while (true) {
1339     StringRef CheckRegion;
1340     if (j == e) {
1341       CheckRegion = Buffer;
1342     } else {
1343       const CheckString &CheckLabelStr = CheckStrings[j];
1344       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1345         ++j;
1346         continue;
1347       }
1348 
1349       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1350       size_t MatchLabelLen = 0;
1351       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1352                                                  MatchLabelLen, VariableTable);
1353       if (MatchLabelPos == StringRef::npos) {
1354         hasError = true;
1355         break;
1356       }
1357 
1358       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1359       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1360       ++j;
1361     }
1362 
1363     for ( ; i != j; ++i) {
1364       const CheckString &CheckStr = CheckStrings[i];
1365 
1366       // Check each string within the scanned region, including a second check
1367       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1368       size_t MatchLen = 0;
1369       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1370                                        VariableTable);
1371 
1372       if (MatchPos == StringRef::npos) {
1373         hasError = true;
1374         i = j;
1375         break;
1376       }
1377 
1378       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1379     }
1380 
1381     if (j == e)
1382       break;
1383   }
1384 
1385   return hasError ? 1 : 0;
1386 }
1387