xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision e8f2fb206103fc4c990d369cacd23a43a4a33fdf)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43                   cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string> CheckPrefixes(
46     "check-prefix",
47     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool> NoCanonicalizeWhiteSpace(
55     "strict-whitespace",
56     cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::opt<bool> AllowEmptyInput(
66     "allow-empty", cl::init(false),
67     cl::desc("Allow the input file to be empty. This is useful when making\n"
68              "checks that some error message does not occur, for example."));
69 
70 static cl::opt<bool> MatchFullLines(
71     "match-full-lines", cl::init(false),
72     cl::desc("Require all positive matches to cover an entire input line.\n"
73              "Allows leading and trailing whitespace if --strict-whitespace\n"
74              "is not also passed."));
75 
76 typedef cl::list<std::string>::const_iterator prefix_iterator;
77 
78 //===----------------------------------------------------------------------===//
79 // Pattern Handling Code.
80 //===----------------------------------------------------------------------===//
81 
82 namespace Check {
83 enum CheckType {
84   CheckNone = 0,
85   CheckPlain,
86   CheckNext,
87   CheckSame,
88   CheckNot,
89   CheckDAG,
90   CheckLabel,
91 
92   /// MatchEOF - When set, this pattern only matches the end of file. This is
93   /// used for trailing CHECK-NOTs.
94   CheckEOF,
95   /// CheckBadNot - Found -NOT combined with another CHECK suffix.
96   CheckBadNot
97 };
98 }
99 
100 class Pattern {
101   SMLoc PatternLoc;
102 
103   /// FixedStr - If non-empty, this pattern is a fixed string match with the
104   /// specified fixed string.
105   StringRef FixedStr;
106 
107   /// RegEx - If non-empty, this is a regex pattern.
108   std::string RegExStr;
109 
110   /// VariableUses - Entries in this vector map to uses of a variable in the
111   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
112   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
113   /// value of bar at offset 3.
114   std::vector<std::pair<StringRef, unsigned>> VariableUses;
115 
116   /// VariableDefs - Maps definitions of variables to their parenthesized
117   /// capture numbers.
118   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
119   std::map<StringRef, unsigned> VariableDefs;
120 
121   Check::CheckType CheckTy;
122 
123   /// \brief Contains the number of line this pattern is in.
124   unsigned LineNumber;
125 
126 public:
127   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
128 
129   /// getLoc - Return the location in source code.
130   SMLoc getLoc() const { return PatternLoc; }
131 
132   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
133   /// which prefix is being matched, SM provides the SourceMgr used for error
134   /// reports, and LineNumber is the line number in the input file from which
135   /// the pattern string was read.  Returns true in case of an error, false
136   /// otherwise.
137   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
138                     unsigned LineNumber);
139 
140   /// Match - Match the pattern string against the input buffer Buffer.  This
141   /// returns the position that is matched or npos if there is no match.  If
142   /// there is a match, the size of the matched string is returned in MatchLen.
143   ///
144   /// The VariableTable StringMap provides the current values of filecheck
145   /// variables and is updated if this match defines new values.
146   size_t Match(StringRef Buffer, size_t &MatchLen,
147                StringMap<StringRef> &VariableTable) const;
148 
149   /// PrintFailureInfo - Print additional information about a failure to match
150   /// involving this pattern.
151   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
152                         const StringMap<StringRef> &VariableTable) const;
153 
154   bool hasVariable() const {
155     return !(VariableUses.empty() && VariableDefs.empty());
156   }
157 
158   Check::CheckType getCheckTy() const { return CheckTy; }
159 
160 private:
161   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
162   void AddBackrefToRegEx(unsigned BackrefNum);
163 
164   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
165   /// matching this pattern at the start of \arg Buffer; a distance of zero
166   /// should correspond to a perfect match.
167   unsigned
168   ComputeMatchDistance(StringRef Buffer,
169                        const StringMap<StringRef> &VariableTable) const;
170 
171   /// \brief Evaluates expression and stores the result to \p Value.
172   /// \return true on success. false when the expression has invalid syntax.
173   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
174 
175   /// \brief Finds the closing sequence of a regex variable usage or
176   /// definition. Str has to point in the beginning of the definition
177   /// (right after the opening sequence).
178   /// \return offset of the closing sequence within Str, or npos if it was not
179   /// found.
180   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
181 };
182 
183 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
184                            SourceMgr &SM, unsigned LineNumber) {
185   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
186 
187   this->LineNumber = LineNumber;
188   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
189 
190   // Ignore trailing whitespace.
191   while (!PatternStr.empty() &&
192          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
193     PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
194 
195   // Check that there is something on the line.
196   if (PatternStr.empty()) {
197     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
198                     "found empty check string with prefix '" + Prefix + ":'");
199     return true;
200   }
201 
202   // Check to see if this is a fixed string, or if it has regex pieces.
203   if (!MatchFullLinesHere &&
204       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
205                                  PatternStr.find("[[") == StringRef::npos))) {
206     FixedStr = PatternStr;
207     return false;
208   }
209 
210   if (MatchFullLinesHere) {
211     RegExStr += '^';
212     if (!NoCanonicalizeWhiteSpace)
213       RegExStr += " *";
214   }
215 
216   // Paren value #0 is for the fully matched string.  Any new parenthesized
217   // values add from there.
218   unsigned CurParen = 1;
219 
220   // Otherwise, there is at least one regex piece.  Build up the regex pattern
221   // by escaping scary characters in fixed strings, building up one big regex.
222   while (!PatternStr.empty()) {
223     // RegEx matches.
224     if (PatternStr.startswith("{{")) {
225       // This is the start of a regex match.  Scan for the }}.
226       size_t End = PatternStr.find("}}");
227       if (End == StringRef::npos) {
228         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
229                         SourceMgr::DK_Error,
230                         "found start of regex string with no end '}}'");
231         return true;
232       }
233 
234       // Enclose {{}} patterns in parens just like [[]] even though we're not
235       // capturing the result for any purpose.  This is required in case the
236       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
237       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
238       RegExStr += '(';
239       ++CurParen;
240 
241       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
242         return true;
243       RegExStr += ')';
244 
245       PatternStr = PatternStr.substr(End + 2);
246       continue;
247     }
248 
249     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
250     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
251     // second form is [[foo]] which is a reference to foo.  The variable name
252     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
253     // it.  This is to catch some common errors.
254     if (PatternStr.startswith("[[")) {
255       // Find the closing bracket pair ending the match.  End is going to be an
256       // offset relative to the beginning of the match string.
257       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
258 
259       if (End == StringRef::npos) {
260         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
261                         SourceMgr::DK_Error,
262                         "invalid named regex reference, no ]] found");
263         return true;
264       }
265 
266       StringRef MatchStr = PatternStr.substr(2, End);
267       PatternStr = PatternStr.substr(End + 4);
268 
269       // Get the regex name (e.g. "foo").
270       size_t NameEnd = MatchStr.find(':');
271       StringRef Name = MatchStr.substr(0, NameEnd);
272 
273       if (Name.empty()) {
274         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
275                         "invalid name in named regex: empty name");
276         return true;
277       }
278 
279       // Verify that the name/expression is well formed. FileCheck currently
280       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
281       // is relaxed, more strict check is performed in \c EvaluateExpression.
282       bool IsExpression = false;
283       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
284         if (i == 0 && Name[i] == '@') {
285           if (NameEnd != StringRef::npos) {
286             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
287                             SourceMgr::DK_Error,
288                             "invalid name in named regex definition");
289             return true;
290           }
291           IsExpression = true;
292           continue;
293         }
294         if (Name[i] != '_' && !isalnum(Name[i]) &&
295             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
296           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
297                           SourceMgr::DK_Error, "invalid name in named regex");
298           return true;
299         }
300       }
301 
302       // Name can't start with a digit.
303       if (isdigit(static_cast<unsigned char>(Name[0]))) {
304         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
305                         "invalid name in named regex");
306         return true;
307       }
308 
309       // Handle [[foo]].
310       if (NameEnd == StringRef::npos) {
311         // Handle variables that were defined earlier on the same line by
312         // emitting a backreference.
313         if (VariableDefs.find(Name) != VariableDefs.end()) {
314           unsigned VarParenNum = VariableDefs[Name];
315           if (VarParenNum < 1 || VarParenNum > 9) {
316             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
317                             SourceMgr::DK_Error,
318                             "Can't back-reference more than 9 variables");
319             return true;
320           }
321           AddBackrefToRegEx(VarParenNum);
322         } else {
323           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
324         }
325         continue;
326       }
327 
328       // Handle [[foo:.*]].
329       VariableDefs[Name] = CurParen;
330       RegExStr += '(';
331       ++CurParen;
332 
333       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
334         return true;
335 
336       RegExStr += ')';
337     }
338 
339     // Handle fixed string matches.
340     // Find the end, which is the start of the next regex.
341     size_t FixedMatchEnd = PatternStr.find("{{");
342     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
343     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
344     PatternStr = PatternStr.substr(FixedMatchEnd);
345   }
346 
347   if (MatchFullLinesHere) {
348     if (!NoCanonicalizeWhiteSpace)
349       RegExStr += " *";
350     RegExStr += '$';
351   }
352 
353   return false;
354 }
355 
356 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
357   Regex R(RS);
358   std::string Error;
359   if (!R.isValid(Error)) {
360     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
361                     "invalid regex: " + Error);
362     return true;
363   }
364 
365   RegExStr += RS.str();
366   CurParen += R.getNumMatches();
367   return false;
368 }
369 
370 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
371   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
372   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
373   RegExStr += Backref;
374 }
375 
376 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
377   // The only supported expression is @LINE([\+-]\d+)?
378   if (!Expr.startswith("@LINE"))
379     return false;
380   Expr = Expr.substr(StringRef("@LINE").size());
381   int Offset = 0;
382   if (!Expr.empty()) {
383     if (Expr[0] == '+')
384       Expr = Expr.substr(1);
385     else if (Expr[0] != '-')
386       return false;
387     if (Expr.getAsInteger(10, Offset))
388       return false;
389   }
390   Value = llvm::itostr(LineNumber + Offset);
391   return true;
392 }
393 
394 /// Match - Match the pattern string against the input buffer Buffer.  This
395 /// returns the position that is matched or npos if there is no match.  If
396 /// there is a match, the size of the matched string is returned in MatchLen.
397 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
398                       StringMap<StringRef> &VariableTable) const {
399   // If this is the EOF pattern, match it immediately.
400   if (CheckTy == Check::CheckEOF) {
401     MatchLen = 0;
402     return Buffer.size();
403   }
404 
405   // If this is a fixed string pattern, just match it now.
406   if (!FixedStr.empty()) {
407     MatchLen = FixedStr.size();
408     return Buffer.find(FixedStr);
409   }
410 
411   // Regex match.
412 
413   // If there are variable uses, we need to create a temporary string with the
414   // actual value.
415   StringRef RegExToMatch = RegExStr;
416   std::string TmpStr;
417   if (!VariableUses.empty()) {
418     TmpStr = RegExStr;
419 
420     unsigned InsertOffset = 0;
421     for (const auto &VariableUse : VariableUses) {
422       std::string Value;
423 
424       if (VariableUse.first[0] == '@') {
425         if (!EvaluateExpression(VariableUse.first, Value))
426           return StringRef::npos;
427       } else {
428         StringMap<StringRef>::iterator it =
429             VariableTable.find(VariableUse.first);
430         // If the variable is undefined, return an error.
431         if (it == VariableTable.end())
432           return StringRef::npos;
433 
434         // Look up the value and escape it so that we can put it into the regex.
435         Value += Regex::escape(it->second);
436       }
437 
438       // Plop it into the regex at the adjusted offset.
439       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
440                     Value.begin(), Value.end());
441       InsertOffset += Value.size();
442     }
443 
444     // Match the newly constructed regex.
445     RegExToMatch = TmpStr;
446   }
447 
448   SmallVector<StringRef, 4> MatchInfo;
449   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
450     return StringRef::npos;
451 
452   // Successful regex match.
453   assert(!MatchInfo.empty() && "Didn't get any match");
454   StringRef FullMatch = MatchInfo[0];
455 
456   // If this defines any variables, remember their values.
457   for (const auto &VariableDef : VariableDefs) {
458     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
459     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
460   }
461 
462   MatchLen = FullMatch.size();
463   return FullMatch.data() - Buffer.data();
464 }
465 
466 unsigned
467 Pattern::ComputeMatchDistance(StringRef Buffer,
468                               const StringMap<StringRef> &VariableTable) const {
469   // Just compute the number of matching characters. For regular expressions, we
470   // just compare against the regex itself and hope for the best.
471   //
472   // FIXME: One easy improvement here is have the regex lib generate a single
473   // example regular expression which matches, and use that as the example
474   // string.
475   StringRef ExampleString(FixedStr);
476   if (ExampleString.empty())
477     ExampleString = RegExStr;
478 
479   // Only compare up to the first line in the buffer, or the string size.
480   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
481   BufferPrefix = BufferPrefix.split('\n').first;
482   return BufferPrefix.edit_distance(ExampleString);
483 }
484 
485 void Pattern::PrintFailureInfo(
486     const SourceMgr &SM, StringRef Buffer,
487     const StringMap<StringRef> &VariableTable) const {
488   // If this was a regular expression using variables, print the current
489   // variable values.
490   if (!VariableUses.empty()) {
491     for (const auto &VariableUse : VariableUses) {
492       SmallString<256> Msg;
493       raw_svector_ostream OS(Msg);
494       StringRef Var = VariableUse.first;
495       if (Var[0] == '@') {
496         std::string Value;
497         if (EvaluateExpression(Var, Value)) {
498           OS << "with expression \"";
499           OS.write_escaped(Var) << "\" equal to \"";
500           OS.write_escaped(Value) << "\"";
501         } else {
502           OS << "uses incorrect expression \"";
503           OS.write_escaped(Var) << "\"";
504         }
505       } else {
506         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
507 
508         // Check for undefined variable references.
509         if (it == VariableTable.end()) {
510           OS << "uses undefined variable \"";
511           OS.write_escaped(Var) << "\"";
512         } else {
513           OS << "with variable \"";
514           OS.write_escaped(Var) << "\" equal to \"";
515           OS.write_escaped(it->second) << "\"";
516         }
517       }
518 
519       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
520                       OS.str());
521     }
522   }
523 
524   // Attempt to find the closest/best fuzzy match.  Usually an error happens
525   // because some string in the output didn't exactly match. In these cases, we
526   // would like to show the user a best guess at what "should have" matched, to
527   // save them having to actually check the input manually.
528   size_t NumLinesForward = 0;
529   size_t Best = StringRef::npos;
530   double BestQuality = 0;
531 
532   // Use an arbitrary 4k limit on how far we will search.
533   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
534     if (Buffer[i] == '\n')
535       ++NumLinesForward;
536 
537     // Patterns have leading whitespace stripped, so skip whitespace when
538     // looking for something which looks like a pattern.
539     if (Buffer[i] == ' ' || Buffer[i] == '\t')
540       continue;
541 
542     // Compute the "quality" of this match as an arbitrary combination of the
543     // match distance and the number of lines skipped to get to this match.
544     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
545     double Quality = Distance + (NumLinesForward / 100.);
546 
547     if (Quality < BestQuality || Best == StringRef::npos) {
548       Best = i;
549       BestQuality = Quality;
550     }
551   }
552 
553   // Print the "possible intended match here" line if we found something
554   // reasonable and not equal to what we showed in the "scanning from here"
555   // line.
556   if (Best && Best != StringRef::npos && BestQuality < 50) {
557     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
558                     SourceMgr::DK_Note, "possible intended match here");
559 
560     // FIXME: If we wanted to be really friendly we would show why the match
561     // failed, as it can be hard to spot simple one character differences.
562   }
563 }
564 
565 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
566   // Offset keeps track of the current offset within the input Str
567   size_t Offset = 0;
568   // [...] Nesting depth
569   size_t BracketDepth = 0;
570 
571   while (!Str.empty()) {
572     if (Str.startswith("]]") && BracketDepth == 0)
573       return Offset;
574     if (Str[0] == '\\') {
575       // Backslash escapes the next char within regexes, so skip them both.
576       Str = Str.substr(2);
577       Offset += 2;
578     } else {
579       switch (Str[0]) {
580       default:
581         break;
582       case '[':
583         BracketDepth++;
584         break;
585       case ']':
586         if (BracketDepth == 0) {
587           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
588                           SourceMgr::DK_Error,
589                           "missing closing \"]\" for regex variable");
590           exit(1);
591         }
592         BracketDepth--;
593         break;
594       }
595       Str = Str.substr(1);
596       Offset++;
597     }
598   }
599 
600   return StringRef::npos;
601 }
602 
603 //===----------------------------------------------------------------------===//
604 // Check Strings.
605 //===----------------------------------------------------------------------===//
606 
607 /// CheckString - This is a check that we found in the input file.
608 struct CheckString {
609   /// Pat - The pattern to match.
610   Pattern Pat;
611 
612   /// Prefix - Which prefix name this check matched.
613   StringRef Prefix;
614 
615   /// Loc - The location in the match file that the check string was specified.
616   SMLoc Loc;
617 
618   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
619   /// as opposed to a CHECK: directive.
620   //  Check::CheckType CheckTy;
621 
622   /// DagNotStrings - These are all of the strings that are disallowed from
623   /// occurring between this match string and the previous one (or start of
624   /// file).
625   std::vector<Pattern> DagNotStrings;
626 
627   CheckString(const Pattern &P, StringRef S, SMLoc L)
628       : Pat(P), Prefix(S), Loc(L) {}
629 
630   /// Check - Match check string and its "not strings" and/or "dag strings".
631   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
632                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
633 
634   /// CheckNext - Verify there is a single line in the given buffer.
635   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
636 
637   /// CheckSame - Verify there is no newline in the given buffer.
638   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
639 
640   /// CheckNot - Verify there's no "not strings" in the given buffer.
641   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
642                 const std::vector<const Pattern *> &NotStrings,
643                 StringMap<StringRef> &VariableTable) const;
644 
645   /// CheckDag - Match "dag strings" and their mixed "not strings".
646   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
647                   std::vector<const Pattern *> &NotStrings,
648                   StringMap<StringRef> &VariableTable) const;
649 };
650 
651 /// Canonicalize whitespaces in the file. Line endings are replaced with
652 /// UNIX-style '\n'.
653 ///
654 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
655 /// characters to a single space.
656 static StringRef CanonicalizeFile(MemoryBuffer &MB, bool PreserveHorizontal,
657                                   SmallVectorImpl<char> &OutputBuffer) {
658   OutputBuffer.reserve(MB.getBufferSize());
659 
660   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
661        Ptr != End; ++Ptr) {
662     // Eliminate trailing dosish \r.
663     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
664       continue;
665     }
666 
667     // If current char is not a horizontal whitespace or if horizontal
668     // whitespace canonicalization is disabled, dump it to output as is.
669     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
670       OutputBuffer.push_back(*Ptr);
671       continue;
672     }
673 
674     // Otherwise, add one space and advance over neighboring space.
675     OutputBuffer.push_back(' ');
676     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
677       ++Ptr;
678   }
679 
680   // Add a null byte and then return all but that byte.
681   OutputBuffer.push_back('\0');
682   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
683 }
684 
685 static bool IsPartOfWord(char c) {
686   return (isalnum(c) || c == '-' || c == '_');
687 }
688 
689 // Get the size of the prefix extension.
690 static size_t CheckTypeSize(Check::CheckType Ty) {
691   switch (Ty) {
692   case Check::CheckNone:
693   case Check::CheckBadNot:
694     return 0;
695 
696   case Check::CheckPlain:
697     return sizeof(":") - 1;
698 
699   case Check::CheckNext:
700     return sizeof("-NEXT:") - 1;
701 
702   case Check::CheckSame:
703     return sizeof("-SAME:") - 1;
704 
705   case Check::CheckNot:
706     return sizeof("-NOT:") - 1;
707 
708   case Check::CheckDAG:
709     return sizeof("-DAG:") - 1;
710 
711   case Check::CheckLabel:
712     return sizeof("-LABEL:") - 1;
713 
714   case Check::CheckEOF:
715     llvm_unreachable("Should not be using EOF size");
716   }
717 
718   llvm_unreachable("Bad check type");
719 }
720 
721 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
722   char NextChar = Buffer[Prefix.size()];
723 
724   // Verify that the : is present after the prefix.
725   if (NextChar == ':')
726     return Check::CheckPlain;
727 
728   if (NextChar != '-')
729     return Check::CheckNone;
730 
731   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
732   if (Rest.startswith("NEXT:"))
733     return Check::CheckNext;
734 
735   if (Rest.startswith("SAME:"))
736     return Check::CheckSame;
737 
738   if (Rest.startswith("NOT:"))
739     return Check::CheckNot;
740 
741   if (Rest.startswith("DAG:"))
742     return Check::CheckDAG;
743 
744   if (Rest.startswith("LABEL:"))
745     return Check::CheckLabel;
746 
747   // You can't combine -NOT with another suffix.
748   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
749       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
750       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
751     return Check::CheckBadNot;
752 
753   return Check::CheckNone;
754 }
755 
756 // From the given position, find the next character after the word.
757 static size_t SkipWord(StringRef Str, size_t Loc) {
758   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
759     ++Loc;
760   return Loc;
761 }
762 
763 // Try to find the first match in buffer for any prefix. If a valid match is
764 // found, return that prefix and set its type and location.  If there are almost
765 // matches (e.g. the actual prefix string is found, but is not an actual check
766 // string), but no valid match, return an empty string and set the position to
767 // resume searching from. If no partial matches are found, return an empty
768 // string and the location will be StringRef::npos. If one prefix is a substring
769 // of another, the maximal match should be found. e.g. if "A" and "AA" are
770 // prefixes then AA-CHECK: should match the second one.
771 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
772                                          Check::CheckType &CheckTy,
773                                          size_t &CheckLoc) {
774   StringRef FirstPrefix;
775   size_t FirstLoc = StringRef::npos;
776   size_t SearchLoc = StringRef::npos;
777   Check::CheckType FirstTy = Check::CheckNone;
778 
779   CheckTy = Check::CheckNone;
780   CheckLoc = StringRef::npos;
781 
782   for (StringRef Prefix : CheckPrefixes) {
783     size_t PrefixLoc = Buffer.find(Prefix);
784 
785     if (PrefixLoc == StringRef::npos)
786       continue;
787 
788     // Track where we are searching for invalid prefixes that look almost right.
789     // We need to only advance to the first partial match on the next attempt
790     // since a partial match could be a substring of a later, valid prefix.
791     // Need to skip to the end of the word, otherwise we could end up
792     // matching a prefix in a substring later.
793     if (PrefixLoc < SearchLoc)
794       SearchLoc = SkipWord(Buffer, PrefixLoc);
795 
796     // We only want to find the first match to avoid skipping some.
797     if (PrefixLoc > FirstLoc)
798       continue;
799     // If one matching check-prefix is a prefix of another, choose the
800     // longer one.
801     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
802       continue;
803 
804     StringRef Rest = Buffer.drop_front(PrefixLoc);
805     // Make sure we have actually found the prefix, and not a word containing
806     // it. This should also prevent matching the wrong prefix when one is a
807     // substring of another.
808     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
809       FirstTy = Check::CheckNone;
810     else
811       FirstTy = FindCheckType(Rest, Prefix);
812 
813     FirstLoc = PrefixLoc;
814     FirstPrefix = Prefix;
815   }
816 
817   // If the first prefix is invalid, we should continue the search after it.
818   if (FirstTy == Check::CheckNone) {
819     CheckLoc = SearchLoc;
820     return "";
821   }
822 
823   CheckTy = FirstTy;
824   CheckLoc = FirstLoc;
825   return FirstPrefix;
826 }
827 
828 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
829                                          unsigned &LineNumber,
830                                          Check::CheckType &CheckTy,
831                                          size_t &CheckLoc) {
832   while (!Buffer.empty()) {
833     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
834     // If we found a real match, we are done.
835     if (!Prefix.empty()) {
836       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
837       return Prefix;
838     }
839 
840     // We didn't find any almost matches either, we are also done.
841     if (CheckLoc == StringRef::npos)
842       return StringRef();
843 
844     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
845 
846     // Advance to the last possible match we found and try again.
847     Buffer = Buffer.drop_front(CheckLoc + 1);
848   }
849 
850   return StringRef();
851 }
852 
853 /// ReadCheckFile - Read the check file, which specifies the sequence of
854 /// expected strings.  The strings are added to the CheckStrings vector.
855 /// Returns true in case of an error, false otherwise.
856 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer,
857                           std::vector<CheckString> &CheckStrings) {
858   std::vector<Pattern> ImplicitNegativeChecks;
859   for (const auto &PatternString : ImplicitCheckNot) {
860     // Create a buffer with fake command line content in order to display the
861     // command line option responsible for the specific implicit CHECK-NOT.
862     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
863     std::string Suffix = "'";
864     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
865         Prefix + PatternString + Suffix, "command line");
866 
867     StringRef PatternInBuffer =
868         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
869     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
870 
871     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
872     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
873                                                "IMPLICIT-CHECK", SM, 0);
874   }
875 
876   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
877 
878   // LineNumber keeps track of the line on which CheckPrefix instances are
879   // found.
880   unsigned LineNumber = 1;
881 
882   while (1) {
883     Check::CheckType CheckTy;
884     size_t PrefixLoc;
885 
886     // See if a prefix occurs in the memory buffer.
887     StringRef UsedPrefix =
888         FindFirstMatchingPrefix(Buffer, LineNumber, CheckTy, PrefixLoc);
889     if (UsedPrefix.empty())
890       break;
891 
892     Buffer = Buffer.drop_front(PrefixLoc);
893 
894     // Location to use for error messages.
895     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
896 
897     // PrefixLoc is to the start of the prefix. Skip to the end.
898     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
899 
900     // Complain about useful-looking but unsupported suffixes.
901     if (CheckTy == Check::CheckBadNot) {
902       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
903                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
904       return true;
905     }
906 
907     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
908     // leading and trailing whitespace.
909     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
910 
911     // Scan ahead to the end of line.
912     size_t EOL = Buffer.find_first_of("\n\r");
913 
914     // Remember the location of the start of the pattern, for diagnostics.
915     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
916 
917     // Parse the pattern.
918     Pattern P(CheckTy);
919     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
920       return true;
921 
922     // Verify that CHECK-LABEL lines do not define or use variables
923     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
924       SM.PrintMessage(
925           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
926           "found '" + UsedPrefix + "-LABEL:'"
927                                    " with variable definition or use");
928       return true;
929     }
930 
931     Buffer = Buffer.substr(EOL);
932 
933     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
934     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
935         CheckStrings.empty()) {
936       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
937       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
938                       SourceMgr::DK_Error,
939                       "found '" + UsedPrefix + "-" + Type +
940                           "' without previous '" + UsedPrefix + ": line");
941       return true;
942     }
943 
944     // Handle CHECK-DAG/-NOT.
945     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
946       DagNotMatches.push_back(P);
947       continue;
948     }
949 
950     // Okay, add the string we captured to the output vector and move on.
951     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
952     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
953     DagNotMatches = ImplicitNegativeChecks;
954   }
955 
956   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
957   // prefix as a filler for the error message.
958   if (!DagNotMatches.empty()) {
959     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
960                               SMLoc::getFromPointer(Buffer.data()));
961     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
962   }
963 
964   if (CheckStrings.empty()) {
965     errs() << "error: no check strings found with prefix"
966            << (CheckPrefixes.size() > 1 ? "es " : " ");
967     prefix_iterator I = CheckPrefixes.begin();
968     prefix_iterator E = CheckPrefixes.end();
969     if (I != E) {
970       errs() << "\'" << *I << ":'";
971       ++I;
972     }
973     for (; I != E; ++I)
974       errs() << ", \'" << *I << ":'";
975 
976     errs() << '\n';
977     return true;
978   }
979 
980   return false;
981 }
982 
983 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
984                              StringRef Buffer,
985                              StringMap<StringRef> &VariableTable) {
986   // Otherwise, we have an error, emit an error message.
987   SM.PrintMessage(Loc, SourceMgr::DK_Error,
988                   "expected string not found in input");
989 
990   // Print the "scanning from here" line.  If the current position is at the
991   // end of a line, advance to the start of the next line.
992   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
993 
994   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
995                   "scanning from here");
996 
997   // Allow the pattern to print additional information if desired.
998   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
999 }
1000 
1001 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1002                              StringRef Buffer,
1003                              StringMap<StringRef> &VariableTable) {
1004   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1005 }
1006 
1007 /// CountNumNewlinesBetween - Count the number of newlines in the specified
1008 /// range.
1009 static unsigned CountNumNewlinesBetween(StringRef Range,
1010                                         const char *&FirstNewLine) {
1011   unsigned NumNewLines = 0;
1012   while (1) {
1013     // Scan for newline.
1014     Range = Range.substr(Range.find_first_of("\n\r"));
1015     if (Range.empty())
1016       return NumNewLines;
1017 
1018     ++NumNewLines;
1019 
1020     // Handle \n\r and \r\n as a single newline.
1021     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
1022         (Range[0] != Range[1]))
1023       Range = Range.substr(1);
1024     Range = Range.substr(1);
1025 
1026     if (NumNewLines == 1)
1027       FirstNewLine = Range.begin();
1028   }
1029 }
1030 
1031 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1032                           bool IsLabelScanMode, size_t &MatchLen,
1033                           StringMap<StringRef> &VariableTable) const {
1034   size_t LastPos = 0;
1035   std::vector<const Pattern *> NotStrings;
1036 
1037   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1038   // bounds; we have not processed variable definitions within the bounded block
1039   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1040   // over the block again (including the last CHECK-LABEL) in normal mode.
1041   if (!IsLabelScanMode) {
1042     // Match "dag strings" (with mixed "not strings" if any).
1043     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1044     if (LastPos == StringRef::npos)
1045       return StringRef::npos;
1046   }
1047 
1048   // Match itself from the last position after matching CHECK-DAG.
1049   StringRef MatchBuffer = Buffer.substr(LastPos);
1050   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1051   if (MatchPos == StringRef::npos) {
1052     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1053     return StringRef::npos;
1054   }
1055 
1056   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1057   // or CHECK-NOT
1058   if (!IsLabelScanMode) {
1059     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1060 
1061     // If this check is a "CHECK-NEXT", verify that the previous match was on
1062     // the previous line (i.e. that there is one newline between them).
1063     if (CheckNext(SM, SkippedRegion))
1064       return StringRef::npos;
1065 
1066     // If this check is a "CHECK-SAME", verify that the previous match was on
1067     // the same line (i.e. that there is no newline between them).
1068     if (CheckSame(SM, SkippedRegion))
1069       return StringRef::npos;
1070 
1071     // If this match had "not strings", verify that they don't exist in the
1072     // skipped region.
1073     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1074       return StringRef::npos;
1075   }
1076 
1077   return LastPos + MatchPos;
1078 }
1079 
1080 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1081   if (Pat.getCheckTy() != Check::CheckNext)
1082     return false;
1083 
1084   // Count the number of newlines between the previous match and this one.
1085   assert(Buffer.data() !=
1086              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1087                                     SMLoc::getFromPointer(Buffer.data())))
1088                  ->getBufferStart() &&
1089          "CHECK-NEXT can't be the first check in a file");
1090 
1091   const char *FirstNewLine = nullptr;
1092   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1093 
1094   if (NumNewLines == 0) {
1095     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1096                     Prefix + "-NEXT: is on the same line as previous match");
1097     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1098                     "'next' match was here");
1099     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1100                     "previous match ended here");
1101     return true;
1102   }
1103 
1104   if (NumNewLines != 1) {
1105     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1106                     Prefix +
1107                         "-NEXT: is not on the line after the previous match");
1108     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1109                     "'next' match was here");
1110     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1111                     "previous match ended here");
1112     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1113                     "non-matching line after previous match is here");
1114     return true;
1115   }
1116 
1117   return false;
1118 }
1119 
1120 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1121   if (Pat.getCheckTy() != Check::CheckSame)
1122     return false;
1123 
1124   // Count the number of newlines between the previous match and this one.
1125   assert(Buffer.data() !=
1126              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1127                                     SMLoc::getFromPointer(Buffer.data())))
1128                  ->getBufferStart() &&
1129          "CHECK-SAME can't be the first check in a file");
1130 
1131   const char *FirstNewLine = nullptr;
1132   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1133 
1134   if (NumNewLines != 0) {
1135     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1136                     Prefix +
1137                         "-SAME: is not on the same line as the previous match");
1138     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1139                     "'next' match was here");
1140     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1141                     "previous match ended here");
1142     return true;
1143   }
1144 
1145   return false;
1146 }
1147 
1148 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1149                            const std::vector<const Pattern *> &NotStrings,
1150                            StringMap<StringRef> &VariableTable) const {
1151   for (const Pattern *Pat : NotStrings) {
1152     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1153 
1154     size_t MatchLen = 0;
1155     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1156 
1157     if (Pos == StringRef::npos)
1158       continue;
1159 
1160     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
1161                     SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
1162     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1163                     Prefix + "-NOT: pattern specified here");
1164     return true;
1165   }
1166 
1167   return false;
1168 }
1169 
1170 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1171                              std::vector<const Pattern *> &NotStrings,
1172                              StringMap<StringRef> &VariableTable) const {
1173   if (DagNotStrings.empty())
1174     return 0;
1175 
1176   size_t LastPos = 0;
1177   size_t StartPos = LastPos;
1178 
1179   for (const Pattern &Pat : DagNotStrings) {
1180     assert((Pat.getCheckTy() == Check::CheckDAG ||
1181             Pat.getCheckTy() == Check::CheckNot) &&
1182            "Invalid CHECK-DAG or CHECK-NOT!");
1183 
1184     if (Pat.getCheckTy() == Check::CheckNot) {
1185       NotStrings.push_back(&Pat);
1186       continue;
1187     }
1188 
1189     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1190 
1191     size_t MatchLen = 0, MatchPos;
1192 
1193     // CHECK-DAG always matches from the start.
1194     StringRef MatchBuffer = Buffer.substr(StartPos);
1195     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1196     // With a group of CHECK-DAGs, a single mismatching means the match on
1197     // that group of CHECK-DAGs fails immediately.
1198     if (MatchPos == StringRef::npos) {
1199       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1200       return StringRef::npos;
1201     }
1202     // Re-calc it as the offset relative to the start of the original string.
1203     MatchPos += StartPos;
1204 
1205     if (!NotStrings.empty()) {
1206       if (MatchPos < LastPos) {
1207         // Reordered?
1208         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1209                         SourceMgr::DK_Error,
1210                         Prefix + "-DAG: found a match of CHECK-DAG"
1211                                  " reordering across a CHECK-NOT");
1212         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1213                         SourceMgr::DK_Note,
1214                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1215                                  " is found here");
1216         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1217                         Prefix + "-NOT: the crossed pattern specified"
1218                                  " here");
1219         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1220                         Prefix + "-DAG: the reordered pattern specified"
1221                                  " here");
1222         return StringRef::npos;
1223       }
1224       // All subsequent CHECK-DAGs should be matched from the farthest
1225       // position of all precedent CHECK-DAGs (including this one.)
1226       StartPos = LastPos;
1227       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1228       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1229       // region.
1230       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1231       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1232         return StringRef::npos;
1233       // Clear "not strings".
1234       NotStrings.clear();
1235     }
1236 
1237     // Update the last position with CHECK-DAG matches.
1238     LastPos = std::max(MatchPos + MatchLen, LastPos);
1239   }
1240 
1241   return LastPos;
1242 }
1243 
1244 // A check prefix must contain only alphanumeric, hyphens and underscores.
1245 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1246   Regex Validator("^[a-zA-Z0-9_-]*$");
1247   return Validator.match(CheckPrefix);
1248 }
1249 
1250 static bool ValidateCheckPrefixes() {
1251   StringSet<> PrefixSet;
1252 
1253   for (StringRef Prefix : CheckPrefixes) {
1254     // Reject empty prefixes.
1255     if (Prefix == "")
1256       return false;
1257 
1258     if (!PrefixSet.insert(Prefix).second)
1259       return false;
1260 
1261     if (!ValidateCheckPrefix(Prefix))
1262       return false;
1263   }
1264 
1265   return true;
1266 }
1267 
1268 // I don't think there's a way to specify an initial value for cl::list,
1269 // so if nothing was specified, add the default
1270 static void AddCheckPrefixIfNeeded() {
1271   if (CheckPrefixes.empty())
1272     CheckPrefixes.push_back("CHECK");
1273 }
1274 
1275 static void DumpCommandLine(int argc, char **argv) {
1276   errs() << "FileCheck command line: ";
1277   for (int I = 0; I < argc; I++)
1278     errs() << " " << argv[I];
1279   errs() << "\n";
1280 }
1281 
1282 /// Check the input to FileCheck provided in the \p Buffer against the \p
1283 /// CheckStrings read from the check file.
1284 ///
1285 /// Returns false if the input fails to satisfy the checks.
1286 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1287                 ArrayRef<CheckString> CheckStrings) {
1288   bool ChecksFailed = false;
1289 
1290   /// VariableTable - This holds all the current filecheck variables.
1291   StringMap<StringRef> VariableTable;
1292 
1293   unsigned i = 0, j = 0, e = CheckStrings.size();
1294   while (true) {
1295     StringRef CheckRegion;
1296     if (j == e) {
1297       CheckRegion = Buffer;
1298     } else {
1299       const CheckString &CheckLabelStr = CheckStrings[j];
1300       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1301         ++j;
1302         continue;
1303       }
1304 
1305       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1306       size_t MatchLabelLen = 0;
1307       size_t MatchLabelPos =
1308           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
1309       if (MatchLabelPos == StringRef::npos)
1310         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1311         return false;
1312 
1313       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1314       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1315       ++j;
1316     }
1317 
1318     for (; i != j; ++i) {
1319       const CheckString &CheckStr = CheckStrings[i];
1320 
1321       // Check each string within the scanned region, including a second check
1322       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1323       size_t MatchLen = 0;
1324       size_t MatchPos =
1325           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
1326 
1327       if (MatchPos == StringRef::npos) {
1328         ChecksFailed = true;
1329         i = j;
1330         break;
1331       }
1332 
1333       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1334     }
1335 
1336     if (j == e)
1337       break;
1338   }
1339 
1340   // Success if no checks failed.
1341   return !ChecksFailed;
1342 }
1343 
1344 int main(int argc, char **argv) {
1345   sys::PrintStackTraceOnErrorSignal(argv[0]);
1346   PrettyStackTraceProgram X(argc, argv);
1347   cl::ParseCommandLineOptions(argc, argv);
1348 
1349   if (!ValidateCheckPrefixes()) {
1350     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1351               "start with a letter and contain only alphanumeric characters, "
1352               "hyphens and underscores\n";
1353     return 2;
1354   }
1355 
1356   AddCheckPrefixIfNeeded();
1357 
1358   SourceMgr SM;
1359 
1360   // Read the expected strings from the check file.
1361   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1362       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1363   if (std::error_code EC = CheckFileOrErr.getError()) {
1364     errs() << "Could not open check file '" << CheckFilename
1365            << "': " << EC.message() << '\n';
1366     return 2;
1367   }
1368   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1369 
1370   SmallString<4096> CheckFileBuffer;
1371   StringRef CheckFileText =
1372       CanonicalizeFile(CheckFile, NoCanonicalizeWhiteSpace, CheckFileBuffer);
1373 
1374   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1375                             CheckFileText, CheckFile.getBufferIdentifier()),
1376                         SMLoc());
1377 
1378   std::vector<CheckString> CheckStrings;
1379   if (ReadCheckFile(SM, CheckFileText, CheckStrings))
1380     return 2;
1381 
1382   // Open the file to check and add it to SourceMgr.
1383   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1384       MemoryBuffer::getFileOrSTDIN(InputFilename);
1385   if (std::error_code EC = InputFileOrErr.getError()) {
1386     errs() << "Could not open input file '" << InputFilename
1387            << "': " << EC.message() << '\n';
1388     return 2;
1389   }
1390   MemoryBuffer &InputFile = *InputFileOrErr.get();
1391 
1392   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1393     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1394     DumpCommandLine(argc, argv);
1395     return 2;
1396   }
1397 
1398   SmallString<4096> InputFileBuffer;
1399   StringRef InputFileText =
1400       CanonicalizeFile(InputFile, NoCanonicalizeWhiteSpace, InputFileBuffer);
1401 
1402   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1403                             InputFileText, InputFile.getBufferIdentifier()),
1404                         SMLoc());
1405 
1406   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1407 }
1408