xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision d1e020f7eeca13787baa9f60d24248ca34c2ae4f)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43               cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47               cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool>
55 NoCanonicalizeWhiteSpace("strict-whitespace",
56               cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::opt<bool> AllowEmptyInput(
66     "allow-empty", cl::init(false),
67     cl::desc("Allow the input file to be empty. This is useful when making\n"
68              "checks that some error message does not occur, for example."));
69 
70 static cl::opt<bool> MatchFullLines(
71     "match-full-lines", cl::init(false),
72     cl::desc("Require all positive matches to cover an entire input line.\n"
73              "Allows leading and trailing whitespace if --strict-whitespace\n"
74              "is not also passed."));
75 
76 typedef cl::list<std::string>::const_iterator prefix_iterator;
77 
78 //===----------------------------------------------------------------------===//
79 // Pattern Handling Code.
80 //===----------------------------------------------------------------------===//
81 
82 namespace Check {
83   enum CheckType {
84     CheckNone = 0,
85     CheckPlain,
86     CheckNext,
87     CheckSame,
88     CheckNot,
89     CheckDAG,
90     CheckLabel,
91 
92     /// MatchEOF - When set, this pattern only matches the end of file. This is
93     /// used for trailing CHECK-NOTs.
94     CheckEOF,
95     /// CheckBadNot - Found -NOT combined with another CHECK suffix.
96     CheckBadNot
97   };
98 }
99 
100 class Pattern {
101   SMLoc PatternLoc;
102 
103   /// FixedStr - If non-empty, this pattern is a fixed string match with the
104   /// specified fixed string.
105   StringRef FixedStr;
106 
107   /// RegEx - If non-empty, this is a regex pattern.
108   std::string RegExStr;
109 
110   /// VariableUses - Entries in this vector map to uses of a variable in the
111   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
112   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
113   /// value of bar at offset 3.
114   std::vector<std::pair<StringRef, unsigned> > VariableUses;
115 
116   /// VariableDefs - Maps definitions of variables to their parenthesized
117   /// capture numbers.
118   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
119   std::map<StringRef, unsigned> VariableDefs;
120 
121   Check::CheckType CheckTy;
122 
123   /// \brief Contains the number of line this pattern is in.
124   unsigned LineNumber;
125 
126 public:
127   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
128 
129   /// getLoc - Return the location in source code.
130   SMLoc getLoc() const { return PatternLoc; }
131 
132   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
133   /// which prefix is being matched, SM provides the SourceMgr used for error
134   /// reports, and LineNumber is the line number in the input file from which
135   /// the pattern string was read.  Returns true in case of an error, false
136   /// otherwise.
137   bool ParsePattern(StringRef PatternStr,
138                     StringRef Prefix,
139                     SourceMgr &SM,
140                     unsigned LineNumber);
141 
142   /// Match - Match the pattern string against the input buffer Buffer.  This
143   /// returns the position that is matched or npos if there is no match.  If
144   /// there is a match, the size of the matched string is returned in MatchLen.
145   ///
146   /// The VariableTable StringMap provides the current values of filecheck
147   /// variables and is updated if this match defines new values.
148   size_t Match(StringRef Buffer, size_t &MatchLen,
149                StringMap<StringRef> &VariableTable) const;
150 
151   /// PrintFailureInfo - Print additional information about a failure to match
152   /// involving this pattern.
153   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
154                         const StringMap<StringRef> &VariableTable) const;
155 
156   bool hasVariable() const { return !(VariableUses.empty() &&
157                                       VariableDefs.empty()); }
158 
159   Check::CheckType getCheckTy() const { return CheckTy; }
160 
161 private:
162   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
163   void AddBackrefToRegEx(unsigned BackrefNum);
164 
165   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
166   /// matching this pattern at the start of \arg Buffer; a distance of zero
167   /// should correspond to a perfect match.
168   unsigned ComputeMatchDistance(StringRef Buffer,
169                                const StringMap<StringRef> &VariableTable) const;
170 
171   /// \brief Evaluates expression and stores the result to \p Value.
172   /// \return true on success. false when the expression has invalid syntax.
173   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
174 
175   /// \brief Finds the closing sequence of a regex variable usage or
176   /// definition. Str has to point in the beginning of the definition
177   /// (right after the opening sequence).
178   /// \return offset of the closing sequence within Str, or npos if it was not
179   /// found.
180   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
181 };
182 
183 
184 bool Pattern::ParsePattern(StringRef PatternStr,
185                            StringRef Prefix,
186                            SourceMgr &SM,
187                            unsigned LineNumber) {
188   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
189 
190   this->LineNumber = LineNumber;
191   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
192 
193   // Ignore trailing whitespace.
194   while (!PatternStr.empty() &&
195          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
196     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
197 
198   // Check that there is something on the line.
199   if (PatternStr.empty()) {
200     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
201                     "found empty check string with prefix '" +
202                     Prefix + ":'");
203     return true;
204   }
205 
206   // Check to see if this is a fixed string, or if it has regex pieces.
207   if (!MatchFullLinesHere &&
208       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
209                                  PatternStr.find("[[") == StringRef::npos))) {
210     FixedStr = PatternStr;
211     return false;
212   }
213 
214   if (MatchFullLinesHere) {
215     RegExStr += '^';
216     if (!NoCanonicalizeWhiteSpace)
217       RegExStr += " *";
218   }
219 
220   // Paren value #0 is for the fully matched string.  Any new parenthesized
221   // values add from there.
222   unsigned CurParen = 1;
223 
224   // Otherwise, there is at least one regex piece.  Build up the regex pattern
225   // by escaping scary characters in fixed strings, building up one big regex.
226   while (!PatternStr.empty()) {
227     // RegEx matches.
228     if (PatternStr.startswith("{{")) {
229       // This is the start of a regex match.  Scan for the }}.
230       size_t End = PatternStr.find("}}");
231       if (End == StringRef::npos) {
232         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
233                         SourceMgr::DK_Error,
234                         "found start of regex string with no end '}}'");
235         return true;
236       }
237 
238       // Enclose {{}} patterns in parens just like [[]] even though we're not
239       // capturing the result for any purpose.  This is required in case the
240       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
241       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
242       RegExStr += '(';
243       ++CurParen;
244 
245       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
246         return true;
247       RegExStr += ')';
248 
249       PatternStr = PatternStr.substr(End+2);
250       continue;
251     }
252 
253     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
254     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
255     // second form is [[foo]] which is a reference to foo.  The variable name
256     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
257     // it.  This is to catch some common errors.
258     if (PatternStr.startswith("[[")) {
259       // Find the closing bracket pair ending the match.  End is going to be an
260       // offset relative to the beginning of the match string.
261       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
262 
263       if (End == StringRef::npos) {
264         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
265                         SourceMgr::DK_Error,
266                         "invalid named regex reference, no ]] found");
267         return true;
268       }
269 
270       StringRef MatchStr = PatternStr.substr(2, End);
271       PatternStr = PatternStr.substr(End+4);
272 
273       // Get the regex name (e.g. "foo").
274       size_t NameEnd = MatchStr.find(':');
275       StringRef Name = MatchStr.substr(0, NameEnd);
276 
277       if (Name.empty()) {
278         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
279                         "invalid name in named regex: empty name");
280         return true;
281       }
282 
283       // Verify that the name/expression is well formed. FileCheck currently
284       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
285       // is relaxed, more strict check is performed in \c EvaluateExpression.
286       bool IsExpression = false;
287       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
288         if (i == 0 && Name[i] == '@') {
289           if (NameEnd != StringRef::npos) {
290             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
291                             SourceMgr::DK_Error,
292                             "invalid name in named regex definition");
293             return true;
294           }
295           IsExpression = true;
296           continue;
297         }
298         if (Name[i] != '_' && !isalnum(Name[i]) &&
299             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
300           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
301                           SourceMgr::DK_Error, "invalid name in named regex");
302           return true;
303         }
304       }
305 
306       // Name can't start with a digit.
307       if (isdigit(static_cast<unsigned char>(Name[0]))) {
308         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
309                         "invalid name in named regex");
310         return true;
311       }
312 
313       // Handle [[foo]].
314       if (NameEnd == StringRef::npos) {
315         // Handle variables that were defined earlier on the same line by
316         // emitting a backreference.
317         if (VariableDefs.find(Name) != VariableDefs.end()) {
318           unsigned VarParenNum = VariableDefs[Name];
319           if (VarParenNum < 1 || VarParenNum > 9) {
320             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
321                             SourceMgr::DK_Error,
322                             "Can't back-reference more than 9 variables");
323             return true;
324           }
325           AddBackrefToRegEx(VarParenNum);
326         } else {
327           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
328         }
329         continue;
330       }
331 
332       // Handle [[foo:.*]].
333       VariableDefs[Name] = CurParen;
334       RegExStr += '(';
335       ++CurParen;
336 
337       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
338         return true;
339 
340       RegExStr += ')';
341     }
342 
343     // Handle fixed string matches.
344     // Find the end, which is the start of the next regex.
345     size_t FixedMatchEnd = PatternStr.find("{{");
346     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
347     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
348     PatternStr = PatternStr.substr(FixedMatchEnd);
349   }
350 
351   if (MatchFullLinesHere) {
352     if (!NoCanonicalizeWhiteSpace)
353       RegExStr += " *";
354     RegExStr += '$';
355   }
356 
357   return false;
358 }
359 
360 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
361                               SourceMgr &SM) {
362   Regex R(RS);
363   std::string Error;
364   if (!R.isValid(Error)) {
365     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
366                     "invalid regex: " + Error);
367     return true;
368   }
369 
370   RegExStr += RS.str();
371   CurParen += R.getNumMatches();
372   return false;
373 }
374 
375 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
376   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
377   std::string Backref = std::string("\\") +
378                         std::string(1, '0' + BackrefNum);
379   RegExStr += Backref;
380 }
381 
382 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
383   // The only supported expression is @LINE([\+-]\d+)?
384   if (!Expr.startswith("@LINE"))
385     return false;
386   Expr = Expr.substr(StringRef("@LINE").size());
387   int Offset = 0;
388   if (!Expr.empty()) {
389     if (Expr[0] == '+')
390       Expr = Expr.substr(1);
391     else if (Expr[0] != '-')
392       return false;
393     if (Expr.getAsInteger(10, Offset))
394       return false;
395   }
396   Value = llvm::itostr(LineNumber + Offset);
397   return true;
398 }
399 
400 /// Match - Match the pattern string against the input buffer Buffer.  This
401 /// returns the position that is matched or npos if there is no match.  If
402 /// there is a match, the size of the matched string is returned in MatchLen.
403 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
404                       StringMap<StringRef> &VariableTable) const {
405   // If this is the EOF pattern, match it immediately.
406   if (CheckTy == Check::CheckEOF) {
407     MatchLen = 0;
408     return Buffer.size();
409   }
410 
411   // If this is a fixed string pattern, just match it now.
412   if (!FixedStr.empty()) {
413     MatchLen = FixedStr.size();
414     return Buffer.find(FixedStr);
415   }
416 
417   // Regex match.
418 
419   // If there are variable uses, we need to create a temporary string with the
420   // actual value.
421   StringRef RegExToMatch = RegExStr;
422   std::string TmpStr;
423   if (!VariableUses.empty()) {
424     TmpStr = RegExStr;
425 
426     unsigned InsertOffset = 0;
427     for (const auto &VariableUse : VariableUses) {
428       std::string Value;
429 
430       if (VariableUse.first[0] == '@') {
431         if (!EvaluateExpression(VariableUse.first, Value))
432           return StringRef::npos;
433       } else {
434         StringMap<StringRef>::iterator it =
435             VariableTable.find(VariableUse.first);
436         // If the variable is undefined, return an error.
437         if (it == VariableTable.end())
438           return StringRef::npos;
439 
440         // Look up the value and escape it so that we can put it into the regex.
441         Value += Regex::escape(it->second);
442       }
443 
444       // Plop it into the regex at the adjusted offset.
445       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
446                     Value.begin(), Value.end());
447       InsertOffset += Value.size();
448     }
449 
450     // Match the newly constructed regex.
451     RegExToMatch = TmpStr;
452   }
453 
454 
455   SmallVector<StringRef, 4> MatchInfo;
456   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
457     return StringRef::npos;
458 
459   // Successful regex match.
460   assert(!MatchInfo.empty() && "Didn't get any match");
461   StringRef FullMatch = MatchInfo[0];
462 
463   // If this defines any variables, remember their values.
464   for (const auto &VariableDef : VariableDefs) {
465     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
466     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
467   }
468 
469   MatchLen = FullMatch.size();
470   return FullMatch.data()-Buffer.data();
471 }
472 
473 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
474                               const StringMap<StringRef> &VariableTable) const {
475   // Just compute the number of matching characters. For regular expressions, we
476   // just compare against the regex itself and hope for the best.
477   //
478   // FIXME: One easy improvement here is have the regex lib generate a single
479   // example regular expression which matches, and use that as the example
480   // string.
481   StringRef ExampleString(FixedStr);
482   if (ExampleString.empty())
483     ExampleString = RegExStr;
484 
485   // Only compare up to the first line in the buffer, or the string size.
486   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
487   BufferPrefix = BufferPrefix.split('\n').first;
488   return BufferPrefix.edit_distance(ExampleString);
489 }
490 
491 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
492                                const StringMap<StringRef> &VariableTable) const{
493   // If this was a regular expression using variables, print the current
494   // variable values.
495   if (!VariableUses.empty()) {
496     for (const auto &VariableUse : VariableUses) {
497       SmallString<256> Msg;
498       raw_svector_ostream OS(Msg);
499       StringRef Var = VariableUse.first;
500       if (Var[0] == '@') {
501         std::string Value;
502         if (EvaluateExpression(Var, Value)) {
503           OS << "with expression \"";
504           OS.write_escaped(Var) << "\" equal to \"";
505           OS.write_escaped(Value) << "\"";
506         } else {
507           OS << "uses incorrect expression \"";
508           OS.write_escaped(Var) << "\"";
509         }
510       } else {
511         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
512 
513         // Check for undefined variable references.
514         if (it == VariableTable.end()) {
515           OS << "uses undefined variable \"";
516           OS.write_escaped(Var) << "\"";
517         } else {
518           OS << "with variable \"";
519           OS.write_escaped(Var) << "\" equal to \"";
520           OS.write_escaped(it->second) << "\"";
521         }
522       }
523 
524       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
525                       OS.str());
526     }
527   }
528 
529   // Attempt to find the closest/best fuzzy match.  Usually an error happens
530   // because some string in the output didn't exactly match. In these cases, we
531   // would like to show the user a best guess at what "should have" matched, to
532   // save them having to actually check the input manually.
533   size_t NumLinesForward = 0;
534   size_t Best = StringRef::npos;
535   double BestQuality = 0;
536 
537   // Use an arbitrary 4k limit on how far we will search.
538   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
539     if (Buffer[i] == '\n')
540       ++NumLinesForward;
541 
542     // Patterns have leading whitespace stripped, so skip whitespace when
543     // looking for something which looks like a pattern.
544     if (Buffer[i] == ' ' || Buffer[i] == '\t')
545       continue;
546 
547     // Compute the "quality" of this match as an arbitrary combination of the
548     // match distance and the number of lines skipped to get to this match.
549     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
550     double Quality = Distance + (NumLinesForward / 100.);
551 
552     if (Quality < BestQuality || Best == StringRef::npos) {
553       Best = i;
554       BestQuality = Quality;
555     }
556   }
557 
558   // Print the "possible intended match here" line if we found something
559   // reasonable and not equal to what we showed in the "scanning from here"
560   // line.
561   if (Best && Best != StringRef::npos && BestQuality < 50) {
562       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
563                       SourceMgr::DK_Note, "possible intended match here");
564 
565     // FIXME: If we wanted to be really friendly we would show why the match
566     // failed, as it can be hard to spot simple one character differences.
567   }
568 }
569 
570 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
571   // Offset keeps track of the current offset within the input Str
572   size_t Offset = 0;
573   // [...] Nesting depth
574   size_t BracketDepth = 0;
575 
576   while (!Str.empty()) {
577     if (Str.startswith("]]") && BracketDepth == 0)
578       return Offset;
579     if (Str[0] == '\\') {
580       // Backslash escapes the next char within regexes, so skip them both.
581       Str = Str.substr(2);
582       Offset += 2;
583     } else {
584       switch (Str[0]) {
585         default:
586           break;
587         case '[':
588           BracketDepth++;
589           break;
590         case ']':
591           if (BracketDepth == 0) {
592             SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
593                             SourceMgr::DK_Error,
594                             "missing closing \"]\" for regex variable");
595             exit(1);
596           }
597           BracketDepth--;
598           break;
599       }
600       Str = Str.substr(1);
601       Offset++;
602     }
603   }
604 
605   return StringRef::npos;
606 }
607 
608 
609 //===----------------------------------------------------------------------===//
610 // Check Strings.
611 //===----------------------------------------------------------------------===//
612 
613 /// CheckString - This is a check that we found in the input file.
614 struct CheckString {
615   /// Pat - The pattern to match.
616   Pattern Pat;
617 
618   /// Prefix - Which prefix name this check matched.
619   StringRef Prefix;
620 
621   /// Loc - The location in the match file that the check string was specified.
622   SMLoc Loc;
623 
624   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
625   /// as opposed to a CHECK: directive.
626   //  Check::CheckType CheckTy;
627 
628   /// DagNotStrings - These are all of the strings that are disallowed from
629   /// occurring between this match string and the previous one (or start of
630   /// file).
631   std::vector<Pattern> DagNotStrings;
632 
633   CheckString(const Pattern &P, StringRef S, SMLoc L)
634       : Pat(P), Prefix(S), Loc(L) {}
635 
636   /// Check - Match check string and its "not strings" and/or "dag strings".
637   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
638                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
639 
640   /// CheckNext - Verify there is a single line in the given buffer.
641   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
642 
643   /// CheckSame - Verify there is no newline in the given buffer.
644   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
645 
646   /// CheckNot - Verify there's no "not strings" in the given buffer.
647   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
648                 const std::vector<const Pattern *> &NotStrings,
649                 StringMap<StringRef> &VariableTable) const;
650 
651   /// CheckDag - Match "dag strings" and their mixed "not strings".
652   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
653                   std::vector<const Pattern *> &NotStrings,
654                   StringMap<StringRef> &VariableTable) const;
655 };
656 
657 /// Canonicalize whitespaces in the input file. Line endings are replaced
658 /// with UNIX-style '\n'.
659 ///
660 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
661 /// characters to a single space.
662 static std::unique_ptr<MemoryBuffer>
663 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
664                       bool PreserveHorizontal) {
665   SmallString<128> NewFile;
666   NewFile.reserve(MB->getBufferSize());
667 
668   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
669        Ptr != End; ++Ptr) {
670     // Eliminate trailing dosish \r.
671     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
672       continue;
673     }
674 
675     // If current char is not a horizontal whitespace or if horizontal
676     // whitespace canonicalization is disabled, dump it to output as is.
677     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
678       NewFile.push_back(*Ptr);
679       continue;
680     }
681 
682     // Otherwise, add one space and advance over neighboring space.
683     NewFile.push_back(' ');
684     while (Ptr+1 != End &&
685            (Ptr[1] == ' ' || Ptr[1] == '\t'))
686       ++Ptr;
687   }
688 
689   return std::unique_ptr<MemoryBuffer>(
690       MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
691 }
692 
693 static bool IsPartOfWord(char c) {
694   return (isalnum(c) || c == '-' || c == '_');
695 }
696 
697 // Get the size of the prefix extension.
698 static size_t CheckTypeSize(Check::CheckType Ty) {
699   switch (Ty) {
700   case Check::CheckNone:
701   case Check::CheckBadNot:
702     return 0;
703 
704   case Check::CheckPlain:
705     return sizeof(":") - 1;
706 
707   case Check::CheckNext:
708     return sizeof("-NEXT:") - 1;
709 
710   case Check::CheckSame:
711     return sizeof("-SAME:") - 1;
712 
713   case Check::CheckNot:
714     return sizeof("-NOT:") - 1;
715 
716   case Check::CheckDAG:
717     return sizeof("-DAG:") - 1;
718 
719   case Check::CheckLabel:
720     return sizeof("-LABEL:") - 1;
721 
722   case Check::CheckEOF:
723     llvm_unreachable("Should not be using EOF size");
724   }
725 
726   llvm_unreachable("Bad check type");
727 }
728 
729 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
730   char NextChar = Buffer[Prefix.size()];
731 
732   // Verify that the : is present after the prefix.
733   if (NextChar == ':')
734     return Check::CheckPlain;
735 
736   if (NextChar != '-')
737     return Check::CheckNone;
738 
739   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
740   if (Rest.startswith("NEXT:"))
741     return Check::CheckNext;
742 
743   if (Rest.startswith("SAME:"))
744     return Check::CheckSame;
745 
746   if (Rest.startswith("NOT:"))
747     return Check::CheckNot;
748 
749   if (Rest.startswith("DAG:"))
750     return Check::CheckDAG;
751 
752   if (Rest.startswith("LABEL:"))
753     return Check::CheckLabel;
754 
755   // You can't combine -NOT with another suffix.
756   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
757       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
758       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
759     return Check::CheckBadNot;
760 
761   return Check::CheckNone;
762 }
763 
764 // From the given position, find the next character after the word.
765 static size_t SkipWord(StringRef Str, size_t Loc) {
766   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
767     ++Loc;
768   return Loc;
769 }
770 
771 // Try to find the first match in buffer for any prefix. If a valid match is
772 // found, return that prefix and set its type and location.  If there are almost
773 // matches (e.g. the actual prefix string is found, but is not an actual check
774 // string), but no valid match, return an empty string and set the position to
775 // resume searching from. If no partial matches are found, return an empty
776 // string and the location will be StringRef::npos. If one prefix is a substring
777 // of another, the maximal match should be found. e.g. if "A" and "AA" are
778 // prefixes then AA-CHECK: should match the second one.
779 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
780                                          Check::CheckType &CheckTy,
781                                          size_t &CheckLoc) {
782   StringRef FirstPrefix;
783   size_t FirstLoc = StringRef::npos;
784   size_t SearchLoc = StringRef::npos;
785   Check::CheckType FirstTy = Check::CheckNone;
786 
787   CheckTy = Check::CheckNone;
788   CheckLoc = StringRef::npos;
789 
790   for (StringRef Prefix : CheckPrefixes) {
791     size_t PrefixLoc = Buffer.find(Prefix);
792 
793     if (PrefixLoc == StringRef::npos)
794       continue;
795 
796     // Track where we are searching for invalid prefixes that look almost right.
797     // We need to only advance to the first partial match on the next attempt
798     // since a partial match could be a substring of a later, valid prefix.
799     // Need to skip to the end of the word, otherwise we could end up
800     // matching a prefix in a substring later.
801     if (PrefixLoc < SearchLoc)
802       SearchLoc = SkipWord(Buffer, PrefixLoc);
803 
804     // We only want to find the first match to avoid skipping some.
805     if (PrefixLoc > FirstLoc)
806       continue;
807     // If one matching check-prefix is a prefix of another, choose the
808     // longer one.
809     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
810       continue;
811 
812     StringRef Rest = Buffer.drop_front(PrefixLoc);
813     // Make sure we have actually found the prefix, and not a word containing
814     // it. This should also prevent matching the wrong prefix when one is a
815     // substring of another.
816     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
817       FirstTy = Check::CheckNone;
818     else
819       FirstTy = FindCheckType(Rest, Prefix);
820 
821     FirstLoc = PrefixLoc;
822     FirstPrefix = Prefix;
823   }
824 
825   // If the first prefix is invalid, we should continue the search after it.
826   if (FirstTy == Check::CheckNone) {
827     CheckLoc = SearchLoc;
828     return "";
829   }
830 
831   CheckTy = FirstTy;
832   CheckLoc = FirstLoc;
833   return FirstPrefix;
834 }
835 
836 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
837                                          unsigned &LineNumber,
838                                          Check::CheckType &CheckTy,
839                                          size_t &CheckLoc) {
840   while (!Buffer.empty()) {
841     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
842     // If we found a real match, we are done.
843     if (!Prefix.empty()) {
844       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
845       return Prefix;
846     }
847 
848     // We didn't find any almost matches either, we are also done.
849     if (CheckLoc == StringRef::npos)
850       return StringRef();
851 
852     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
853 
854     // Advance to the last possible match we found and try again.
855     Buffer = Buffer.drop_front(CheckLoc + 1);
856   }
857 
858   return StringRef();
859 }
860 
861 /// ReadCheckFile - Read the check file, which specifies the sequence of
862 /// expected strings.  The strings are added to the CheckStrings vector.
863 /// Returns true in case of an error, false otherwise.
864 static bool ReadCheckFile(SourceMgr &SM,
865                           std::vector<CheckString> &CheckStrings) {
866   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
867       MemoryBuffer::getFileOrSTDIN(CheckFilename);
868   if (std::error_code EC = FileOrErr.getError()) {
869     errs() << "Could not open check file '" << CheckFilename
870            << "': " << EC.message() << '\n';
871     return true;
872   }
873 
874   // If we want to canonicalize whitespace, strip excess whitespace from the
875   // buffer containing the CHECK lines. Remove DOS style line endings.
876   std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
877       std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
878 
879   // Find all instances of CheckPrefix followed by : in the file.
880   StringRef Buffer = F->getBuffer();
881 
882   SM.AddNewSourceBuffer(std::move(F), SMLoc());
883 
884   std::vector<Pattern> ImplicitNegativeChecks;
885   for (const auto &PatternString : ImplicitCheckNot) {
886     // Create a buffer with fake command line content in order to display the
887     // command line option responsible for the specific implicit CHECK-NOT.
888     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
889     std::string Suffix = "'";
890     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
891         Prefix + PatternString + Suffix, "command line");
892 
893     StringRef PatternInBuffer =
894         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
895     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
896 
897     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
898     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
899                                                "IMPLICIT-CHECK", SM, 0);
900   }
901 
902 
903   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
904 
905   // LineNumber keeps track of the line on which CheckPrefix instances are
906   // found.
907   unsigned LineNumber = 1;
908 
909   while (1) {
910     Check::CheckType CheckTy;
911     size_t PrefixLoc;
912 
913     // See if a prefix occurs in the memory buffer.
914     StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
915                                                    LineNumber,
916                                                    CheckTy,
917                                                    PrefixLoc);
918     if (UsedPrefix.empty())
919       break;
920 
921     Buffer = Buffer.drop_front(PrefixLoc);
922 
923     // Location to use for error messages.
924     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
925 
926     // PrefixLoc is to the start of the prefix. Skip to the end.
927     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
928 
929     // Complain about useful-looking but unsupported suffixes.
930     if (CheckTy == Check::CheckBadNot) {
931       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
932                       SourceMgr::DK_Error,
933                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
934       return true;
935     }
936 
937     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
938     // leading and trailing whitespace.
939     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
940 
941     // Scan ahead to the end of line.
942     size_t EOL = Buffer.find_first_of("\n\r");
943 
944     // Remember the location of the start of the pattern, for diagnostics.
945     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
946 
947     // Parse the pattern.
948     Pattern P(CheckTy);
949     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
950       return true;
951 
952     // Verify that CHECK-LABEL lines do not define or use variables
953     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
954       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
955                       SourceMgr::DK_Error,
956                       "found '" + UsedPrefix + "-LABEL:'"
957                       " with variable definition or use");
958       return true;
959     }
960 
961     Buffer = Buffer.substr(EOL);
962 
963     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
964     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
965         CheckStrings.empty()) {
966       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
967       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
968                       SourceMgr::DK_Error,
969                       "found '" + UsedPrefix + "-" + Type + "' without previous '"
970                       + UsedPrefix + ": line");
971       return true;
972     }
973 
974     // Handle CHECK-DAG/-NOT.
975     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
976       DagNotMatches.push_back(P);
977       continue;
978     }
979 
980     // Okay, add the string we captured to the output vector and move on.
981     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
982     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
983     DagNotMatches = ImplicitNegativeChecks;
984   }
985 
986   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
987   // prefix as a filler for the error message.
988   if (!DagNotMatches.empty()) {
989     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
990                               SMLoc::getFromPointer(Buffer.data()));
991     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
992   }
993 
994   if (CheckStrings.empty()) {
995     errs() << "error: no check strings found with prefix"
996            << (CheckPrefixes.size() > 1 ? "es " : " ");
997     prefix_iterator I = CheckPrefixes.begin();
998     prefix_iterator E = CheckPrefixes.end();
999     if (I != E) {
1000       errs() << "\'" << *I << ":'";
1001       ++I;
1002     }
1003     for (; I != E; ++I)
1004       errs() << ", \'" << *I << ":'";
1005 
1006     errs() << '\n';
1007     return true;
1008   }
1009 
1010   return false;
1011 }
1012 
1013 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc,
1014                              const Pattern &Pat, StringRef Buffer,
1015                              StringMap<StringRef> &VariableTable) {
1016   // Otherwise, we have an error, emit an error message.
1017   SM.PrintMessage(Loc, SourceMgr::DK_Error,
1018                   "expected string not found in input");
1019 
1020   // Print the "scanning from here" line.  If the current position is at the
1021   // end of a line, advance to the start of the next line.
1022   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1023 
1024   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1025                   "scanning from here");
1026 
1027   // Allow the pattern to print additional information if desired.
1028   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1029 }
1030 
1031 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1032                              StringRef Buffer,
1033                              StringMap<StringRef> &VariableTable) {
1034   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1035 }
1036 
1037 /// CountNumNewlinesBetween - Count the number of newlines in the specified
1038 /// range.
1039 static unsigned CountNumNewlinesBetween(StringRef Range,
1040                                         const char *&FirstNewLine) {
1041   unsigned NumNewLines = 0;
1042   while (1) {
1043     // Scan for newline.
1044     Range = Range.substr(Range.find_first_of("\n\r"));
1045     if (Range.empty()) return NumNewLines;
1046 
1047     ++NumNewLines;
1048 
1049     // Handle \n\r and \r\n as a single newline.
1050     if (Range.size() > 1 &&
1051         (Range[1] == '\n' || Range[1] == '\r') &&
1052         (Range[0] != Range[1]))
1053       Range = Range.substr(1);
1054     Range = Range.substr(1);
1055 
1056     if (NumNewLines == 1)
1057       FirstNewLine = Range.begin();
1058   }
1059 }
1060 
1061 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1062                           bool IsLabelScanMode, size_t &MatchLen,
1063                           StringMap<StringRef> &VariableTable) const {
1064   size_t LastPos = 0;
1065   std::vector<const Pattern *> NotStrings;
1066 
1067   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1068   // bounds; we have not processed variable definitions within the bounded block
1069   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1070   // over the block again (including the last CHECK-LABEL) in normal mode.
1071   if (!IsLabelScanMode) {
1072     // Match "dag strings" (with mixed "not strings" if any).
1073     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1074     if (LastPos == StringRef::npos)
1075       return StringRef::npos;
1076   }
1077 
1078   // Match itself from the last position after matching CHECK-DAG.
1079   StringRef MatchBuffer = Buffer.substr(LastPos);
1080   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1081   if (MatchPos == StringRef::npos) {
1082     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1083     return StringRef::npos;
1084   }
1085 
1086   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1087   // or CHECK-NOT
1088   if (!IsLabelScanMode) {
1089     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1090 
1091     // If this check is a "CHECK-NEXT", verify that the previous match was on
1092     // the previous line (i.e. that there is one newline between them).
1093     if (CheckNext(SM, SkippedRegion))
1094       return StringRef::npos;
1095 
1096     // If this check is a "CHECK-SAME", verify that the previous match was on
1097     // the same line (i.e. that there is no newline between them).
1098     if (CheckSame(SM, SkippedRegion))
1099       return StringRef::npos;
1100 
1101     // If this match had "not strings", verify that they don't exist in the
1102     // skipped region.
1103     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1104       return StringRef::npos;
1105   }
1106 
1107   return LastPos + MatchPos;
1108 }
1109 
1110 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1111   if (Pat.getCheckTy() != Check::CheckNext)
1112     return false;
1113 
1114   // Count the number of newlines between the previous match and this one.
1115   assert(Buffer.data() !=
1116          SM.getMemoryBuffer(
1117            SM.FindBufferContainingLoc(
1118              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1119          "CHECK-NEXT can't be the first check in a file");
1120 
1121   const char *FirstNewLine = nullptr;
1122   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1123 
1124   if (NumNewLines == 0) {
1125     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1126                     "-NEXT: is on the same line as previous match");
1127     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1128                     SourceMgr::DK_Note, "'next' match was here");
1129     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1130                     "previous match ended here");
1131     return true;
1132   }
1133 
1134   if (NumNewLines != 1) {
1135     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1136                     "-NEXT: is not on the line after the previous match");
1137     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1138                     SourceMgr::DK_Note, "'next' match was here");
1139     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1140                     "previous match ended here");
1141     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1142                     "non-matching line after previous match is here");
1143     return true;
1144   }
1145 
1146   return false;
1147 }
1148 
1149 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1150   if (Pat.getCheckTy() != Check::CheckSame)
1151     return false;
1152 
1153   // Count the number of newlines between the previous match and this one.
1154   assert(Buffer.data() !=
1155              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1156                                     SMLoc::getFromPointer(Buffer.data())))
1157                  ->getBufferStart() &&
1158          "CHECK-SAME can't be the first check in a file");
1159 
1160   const char *FirstNewLine = nullptr;
1161   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1162 
1163   if (NumNewLines != 0) {
1164     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1165                     Prefix +
1166                         "-SAME: is not on the same line as the previous match");
1167     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1168                     "'next' match was here");
1169     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1170                     "previous match ended here");
1171     return true;
1172   }
1173 
1174   return false;
1175 }
1176 
1177 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1178                            const std::vector<const Pattern *> &NotStrings,
1179                            StringMap<StringRef> &VariableTable) const {
1180   for (const Pattern *Pat : NotStrings) {
1181     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1182 
1183     size_t MatchLen = 0;
1184     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1185 
1186     if (Pos == StringRef::npos) continue;
1187 
1188     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1189                     SourceMgr::DK_Error,
1190                     Prefix + "-NOT: string occurred!");
1191     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1192                     Prefix + "-NOT: pattern specified here");
1193     return true;
1194   }
1195 
1196   return false;
1197 }
1198 
1199 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1200                              std::vector<const Pattern *> &NotStrings,
1201                              StringMap<StringRef> &VariableTable) const {
1202   if (DagNotStrings.empty())
1203     return 0;
1204 
1205   size_t LastPos = 0;
1206   size_t StartPos = LastPos;
1207 
1208   for (const Pattern &Pat : DagNotStrings) {
1209     assert((Pat.getCheckTy() == Check::CheckDAG ||
1210             Pat.getCheckTy() == Check::CheckNot) &&
1211            "Invalid CHECK-DAG or CHECK-NOT!");
1212 
1213     if (Pat.getCheckTy() == Check::CheckNot) {
1214       NotStrings.push_back(&Pat);
1215       continue;
1216     }
1217 
1218     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1219 
1220     size_t MatchLen = 0, MatchPos;
1221 
1222     // CHECK-DAG always matches from the start.
1223     StringRef MatchBuffer = Buffer.substr(StartPos);
1224     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1225     // With a group of CHECK-DAGs, a single mismatching means the match on
1226     // that group of CHECK-DAGs fails immediately.
1227     if (MatchPos == StringRef::npos) {
1228       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1229       return StringRef::npos;
1230     }
1231     // Re-calc it as the offset relative to the start of the original string.
1232     MatchPos += StartPos;
1233 
1234     if (!NotStrings.empty()) {
1235       if (MatchPos < LastPos) {
1236         // Reordered?
1237         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1238                         SourceMgr::DK_Error,
1239                         Prefix + "-DAG: found a match of CHECK-DAG"
1240                         " reordering across a CHECK-NOT");
1241         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1242                         SourceMgr::DK_Note,
1243                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1244                         " is found here");
1245         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1246                         Prefix + "-NOT: the crossed pattern specified"
1247                         " here");
1248         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1249                         Prefix + "-DAG: the reordered pattern specified"
1250                         " here");
1251         return StringRef::npos;
1252       }
1253       // All subsequent CHECK-DAGs should be matched from the farthest
1254       // position of all precedent CHECK-DAGs (including this one.)
1255       StartPos = LastPos;
1256       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1257       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1258       // region.
1259       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1260       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1261         return StringRef::npos;
1262       // Clear "not strings".
1263       NotStrings.clear();
1264     }
1265 
1266     // Update the last position with CHECK-DAG matches.
1267     LastPos = std::max(MatchPos + MatchLen, LastPos);
1268   }
1269 
1270   return LastPos;
1271 }
1272 
1273 // A check prefix must contain only alphanumeric, hyphens and underscores.
1274 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1275   Regex Validator("^[a-zA-Z0-9_-]*$");
1276   return Validator.match(CheckPrefix);
1277 }
1278 
1279 static bool ValidateCheckPrefixes() {
1280   StringSet<> PrefixSet;
1281 
1282   for (StringRef Prefix : CheckPrefixes) {
1283     // Reject empty prefixes.
1284     if (Prefix == "")
1285       return false;
1286 
1287     if (!PrefixSet.insert(Prefix).second)
1288       return false;
1289 
1290     if (!ValidateCheckPrefix(Prefix))
1291       return false;
1292   }
1293 
1294   return true;
1295 }
1296 
1297 // I don't think there's a way to specify an initial value for cl::list,
1298 // so if nothing was specified, add the default
1299 static void AddCheckPrefixIfNeeded() {
1300   if (CheckPrefixes.empty())
1301     CheckPrefixes.push_back("CHECK");
1302 }
1303 
1304 static void DumpCommandLine(int argc, char **argv) {
1305   errs() << "FileCheck command line: ";
1306   for (int I = 0; I < argc; I++)
1307     errs() << " " << argv[I];
1308   errs() << "\n";
1309 }
1310 
1311 int main(int argc, char **argv) {
1312   sys::PrintStackTraceOnErrorSignal(argv[0]);
1313   PrettyStackTraceProgram X(argc, argv);
1314   cl::ParseCommandLineOptions(argc, argv);
1315 
1316   if (!ValidateCheckPrefixes()) {
1317     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1318               "start with a letter and contain only alphanumeric characters, "
1319               "hyphens and underscores\n";
1320     return 2;
1321   }
1322 
1323   AddCheckPrefixIfNeeded();
1324 
1325   SourceMgr SM;
1326 
1327   // Read the expected strings from the check file.
1328   std::vector<CheckString> CheckStrings;
1329   if (ReadCheckFile(SM, CheckStrings))
1330     return 2;
1331 
1332   // Open the file to check and add it to SourceMgr.
1333   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1334       MemoryBuffer::getFileOrSTDIN(InputFilename);
1335   if (std::error_code EC = FileOrErr.getError()) {
1336     errs() << "Could not open input file '" << InputFilename
1337            << "': " << EC.message() << '\n';
1338     return 2;
1339   }
1340   std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1341 
1342   if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1343     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1344     DumpCommandLine(argc, argv);
1345     return 2;
1346   }
1347 
1348   // Remove duplicate spaces in the input file if requested.
1349   // Remove DOS style line endings.
1350   std::unique_ptr<MemoryBuffer> F =
1351       CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1352 
1353   // Check that we have all of the expected strings, in order, in the input
1354   // file.
1355   StringRef Buffer = F->getBuffer();
1356 
1357   SM.AddNewSourceBuffer(std::move(F), SMLoc());
1358 
1359   /// VariableTable - This holds all the current filecheck variables.
1360   StringMap<StringRef> VariableTable;
1361 
1362   bool hasError = false;
1363 
1364   unsigned i = 0, j = 0, e = CheckStrings.size();
1365 
1366   while (true) {
1367     StringRef CheckRegion;
1368     if (j == e) {
1369       CheckRegion = Buffer;
1370     } else {
1371       const CheckString &CheckLabelStr = CheckStrings[j];
1372       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1373         ++j;
1374         continue;
1375       }
1376 
1377       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1378       size_t MatchLabelLen = 0;
1379       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1380                                                  MatchLabelLen, VariableTable);
1381       if (MatchLabelPos == StringRef::npos) {
1382         hasError = true;
1383         break;
1384       }
1385 
1386       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1387       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1388       ++j;
1389     }
1390 
1391     for ( ; i != j; ++i) {
1392       const CheckString &CheckStr = CheckStrings[i];
1393 
1394       // Check each string within the scanned region, including a second check
1395       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1396       size_t MatchLen = 0;
1397       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1398                                        VariableTable);
1399 
1400       if (MatchPos == StringRef::npos) {
1401         hasError = true;
1402         i = j;
1403         break;
1404       }
1405 
1406       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1407     }
1408 
1409     if (j == e)
1410       break;
1411   }
1412 
1413   return hasError ? 1 : 0;
1414 }
1415