xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision 2ad6d48b0c631d7fa8416c07f8b47f167cfd2fd8)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43               cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47               cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 
49 static cl::opt<bool>
50 NoCanonicalizeWhiteSpace("strict-whitespace",
51               cl::desc("Do not treat all horizontal whitespace as equivalent"));
52 
53 static cl::list<std::string> ImplicitCheckNot(
54     "implicit-check-not",
55     cl::desc("Add an implicit negative check with this pattern to every\n"
56              "positive check. This can be used to ensure that no instances of\n"
57              "this pattern occur which are not matched by a positive pattern"),
58     cl::value_desc("pattern"));
59 
60 static cl::opt<bool> AllowEmptyInput(
61     "allow-empty", cl::init(false),
62     cl::desc("Allow the input file to be empty. This is useful when making\n"
63              "checks that some error message does not occur, for example."));
64 
65 static cl::opt<bool> MatchFullLines(
66     "match-full-lines", cl::init(false),
67     cl::desc("Require all positive matches to cover an entire input line.\n"
68              "Allows leading and trailing whitespace if --strict-whitespace\n"
69              "is not also passed."));
70 
71 typedef cl::list<std::string>::const_iterator prefix_iterator;
72 
73 //===----------------------------------------------------------------------===//
74 // Pattern Handling Code.
75 //===----------------------------------------------------------------------===//
76 
77 namespace Check {
78   enum CheckType {
79     CheckNone = 0,
80     CheckPlain,
81     CheckNext,
82     CheckSame,
83     CheckNot,
84     CheckDAG,
85     CheckLabel,
86 
87     /// MatchEOF - When set, this pattern only matches the end of file. This is
88     /// used for trailing CHECK-NOTs.
89     CheckEOF,
90     /// CheckBadNot - Found -NOT combined with another CHECK suffix.
91     CheckBadNot
92   };
93 }
94 
95 class Pattern {
96   SMLoc PatternLoc;
97 
98   Check::CheckType CheckTy;
99 
100   /// FixedStr - If non-empty, this pattern is a fixed string match with the
101   /// specified fixed string.
102   StringRef FixedStr;
103 
104   /// RegEx - If non-empty, this is a regex pattern.
105   std::string RegExStr;
106 
107   /// \brief Contains the number of line this pattern is in.
108   unsigned LineNumber;
109 
110   /// VariableUses - Entries in this vector map to uses of a variable in the
111   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
112   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
113   /// value of bar at offset 3.
114   std::vector<std::pair<StringRef, unsigned> > VariableUses;
115 
116   /// VariableDefs - Maps definitions of variables to their parenthesized
117   /// capture numbers.
118   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
119   std::map<StringRef, unsigned> VariableDefs;
120 
121 public:
122 
123   Pattern(Check::CheckType Ty)
124     : CheckTy(Ty) { }
125 
126   /// getLoc - Return the location in source code.
127   SMLoc getLoc() const { return PatternLoc; }
128 
129   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
130   /// which prefix is being matched, SM provides the SourceMgr used for error
131   /// reports, and LineNumber is the line number in the input file from which
132   /// the pattern string was read.  Returns true in case of an error, false
133   /// otherwise.
134   bool ParsePattern(StringRef PatternStr,
135                     StringRef Prefix,
136                     SourceMgr &SM,
137                     unsigned LineNumber);
138 
139   /// Match - Match the pattern string against the input buffer Buffer.  This
140   /// returns the position that is matched or npos if there is no match.  If
141   /// there is a match, the size of the matched string is returned in MatchLen.
142   ///
143   /// The VariableTable StringMap provides the current values of filecheck
144   /// variables and is updated if this match defines new values.
145   size_t Match(StringRef Buffer, size_t &MatchLen,
146                StringMap<StringRef> &VariableTable) const;
147 
148   /// PrintFailureInfo - Print additional information about a failure to match
149   /// involving this pattern.
150   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
151                         const StringMap<StringRef> &VariableTable) const;
152 
153   bool hasVariable() const { return !(VariableUses.empty() &&
154                                       VariableDefs.empty()); }
155 
156   Check::CheckType getCheckTy() const { return CheckTy; }
157 
158 private:
159   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
160   void AddBackrefToRegEx(unsigned BackrefNum);
161 
162   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
163   /// matching this pattern at the start of \arg Buffer; a distance of zero
164   /// should correspond to a perfect match.
165   unsigned ComputeMatchDistance(StringRef Buffer,
166                                const StringMap<StringRef> &VariableTable) const;
167 
168   /// \brief Evaluates expression and stores the result to \p Value.
169   /// \return true on success. false when the expression has invalid syntax.
170   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
171 
172   /// \brief Finds the closing sequence of a regex variable usage or
173   /// definition. Str has to point in the beginning of the definition
174   /// (right after the opening sequence).
175   /// \return offset of the closing sequence within Str, or npos if it was not
176   /// found.
177   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
178 };
179 
180 
181 bool Pattern::ParsePattern(StringRef PatternStr,
182                            StringRef Prefix,
183                            SourceMgr &SM,
184                            unsigned LineNumber) {
185   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
186 
187   this->LineNumber = LineNumber;
188   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
189 
190   // Ignore trailing whitespace.
191   while (!PatternStr.empty() &&
192          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
193     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
194 
195   // Check that there is something on the line.
196   if (PatternStr.empty()) {
197     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
198                     "found empty check string with prefix '" +
199                     Prefix + ":'");
200     return true;
201   }
202 
203   // Check to see if this is a fixed string, or if it has regex pieces.
204   if (!MatchFullLinesHere &&
205       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
206                                  PatternStr.find("[[") == StringRef::npos))) {
207     FixedStr = PatternStr;
208     return false;
209   }
210 
211   if (MatchFullLinesHere) {
212     RegExStr += '^';
213     if (!NoCanonicalizeWhiteSpace)
214       RegExStr += " *";
215   }
216 
217   // Paren value #0 is for the fully matched string.  Any new parenthesized
218   // values add from there.
219   unsigned CurParen = 1;
220 
221   // Otherwise, there is at least one regex piece.  Build up the regex pattern
222   // by escaping scary characters in fixed strings, building up one big regex.
223   while (!PatternStr.empty()) {
224     // RegEx matches.
225     if (PatternStr.startswith("{{")) {
226       // This is the start of a regex match.  Scan for the }}.
227       size_t End = PatternStr.find("}}");
228       if (End == StringRef::npos) {
229         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
230                         SourceMgr::DK_Error,
231                         "found start of regex string with no end '}}'");
232         return true;
233       }
234 
235       // Enclose {{}} patterns in parens just like [[]] even though we're not
236       // capturing the result for any purpose.  This is required in case the
237       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
238       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
239       RegExStr += '(';
240       ++CurParen;
241 
242       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
243         return true;
244       RegExStr += ')';
245 
246       PatternStr = PatternStr.substr(End+2);
247       continue;
248     }
249 
250     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
251     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
252     // second form is [[foo]] which is a reference to foo.  The variable name
253     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
254     // it.  This is to catch some common errors.
255     if (PatternStr.startswith("[[")) {
256       // Find the closing bracket pair ending the match.  End is going to be an
257       // offset relative to the beginning of the match string.
258       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
259 
260       if (End == StringRef::npos) {
261         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
262                         SourceMgr::DK_Error,
263                         "invalid named regex reference, no ]] found");
264         return true;
265       }
266 
267       StringRef MatchStr = PatternStr.substr(2, End);
268       PatternStr = PatternStr.substr(End+4);
269 
270       // Get the regex name (e.g. "foo").
271       size_t NameEnd = MatchStr.find(':');
272       StringRef Name = MatchStr.substr(0, NameEnd);
273 
274       if (Name.empty()) {
275         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
276                         "invalid name in named regex: empty name");
277         return true;
278       }
279 
280       // Verify that the name/expression is well formed. FileCheck currently
281       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
282       // is relaxed, more strict check is performed in \c EvaluateExpression.
283       bool IsExpression = false;
284       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
285         if (i == 0 && Name[i] == '@') {
286           if (NameEnd != StringRef::npos) {
287             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
288                             SourceMgr::DK_Error,
289                             "invalid name in named regex definition");
290             return true;
291           }
292           IsExpression = true;
293           continue;
294         }
295         if (Name[i] != '_' && !isalnum(Name[i]) &&
296             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
297           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
298                           SourceMgr::DK_Error, "invalid name in named regex");
299           return true;
300         }
301       }
302 
303       // Name can't start with a digit.
304       if (isdigit(static_cast<unsigned char>(Name[0]))) {
305         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
306                         "invalid name in named regex");
307         return true;
308       }
309 
310       // Handle [[foo]].
311       if (NameEnd == StringRef::npos) {
312         // Handle variables that were defined earlier on the same line by
313         // emitting a backreference.
314         if (VariableDefs.find(Name) != VariableDefs.end()) {
315           unsigned VarParenNum = VariableDefs[Name];
316           if (VarParenNum < 1 || VarParenNum > 9) {
317             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
318                             SourceMgr::DK_Error,
319                             "Can't back-reference more than 9 variables");
320             return true;
321           }
322           AddBackrefToRegEx(VarParenNum);
323         } else {
324           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
325         }
326         continue;
327       }
328 
329       // Handle [[foo:.*]].
330       VariableDefs[Name] = CurParen;
331       RegExStr += '(';
332       ++CurParen;
333 
334       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
335         return true;
336 
337       RegExStr += ')';
338     }
339 
340     // Handle fixed string matches.
341     // Find the end, which is the start of the next regex.
342     size_t FixedMatchEnd = PatternStr.find("{{");
343     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
344     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
345     PatternStr = PatternStr.substr(FixedMatchEnd);
346   }
347 
348   if (MatchFullLinesHere) {
349     if (!NoCanonicalizeWhiteSpace)
350       RegExStr += " *";
351     RegExStr += '$';
352   }
353 
354   return false;
355 }
356 
357 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
358                               SourceMgr &SM) {
359   Regex R(RS);
360   std::string Error;
361   if (!R.isValid(Error)) {
362     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
363                     "invalid regex: " + Error);
364     return true;
365   }
366 
367   RegExStr += RS.str();
368   CurParen += R.getNumMatches();
369   return false;
370 }
371 
372 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
373   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
374   std::string Backref = std::string("\\") +
375                         std::string(1, '0' + BackrefNum);
376   RegExStr += Backref;
377 }
378 
379 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
380   // The only supported expression is @LINE([\+-]\d+)?
381   if (!Expr.startswith("@LINE"))
382     return false;
383   Expr = Expr.substr(StringRef("@LINE").size());
384   int Offset = 0;
385   if (!Expr.empty()) {
386     if (Expr[0] == '+')
387       Expr = Expr.substr(1);
388     else if (Expr[0] != '-')
389       return false;
390     if (Expr.getAsInteger(10, Offset))
391       return false;
392   }
393   Value = llvm::itostr(LineNumber + Offset);
394   return true;
395 }
396 
397 /// Match - Match the pattern string against the input buffer Buffer.  This
398 /// returns the position that is matched or npos if there is no match.  If
399 /// there is a match, the size of the matched string is returned in MatchLen.
400 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
401                       StringMap<StringRef> &VariableTable) const {
402   // If this is the EOF pattern, match it immediately.
403   if (CheckTy == Check::CheckEOF) {
404     MatchLen = 0;
405     return Buffer.size();
406   }
407 
408   // If this is a fixed string pattern, just match it now.
409   if (!FixedStr.empty()) {
410     MatchLen = FixedStr.size();
411     return Buffer.find(FixedStr);
412   }
413 
414   // Regex match.
415 
416   // If there are variable uses, we need to create a temporary string with the
417   // actual value.
418   StringRef RegExToMatch = RegExStr;
419   std::string TmpStr;
420   if (!VariableUses.empty()) {
421     TmpStr = RegExStr;
422 
423     unsigned InsertOffset = 0;
424     for (const auto &VariableUse : VariableUses) {
425       std::string Value;
426 
427       if (VariableUse.first[0] == '@') {
428         if (!EvaluateExpression(VariableUse.first, Value))
429           return StringRef::npos;
430       } else {
431         StringMap<StringRef>::iterator it =
432             VariableTable.find(VariableUse.first);
433         // If the variable is undefined, return an error.
434         if (it == VariableTable.end())
435           return StringRef::npos;
436 
437         // Look up the value and escape it so that we can put it into the regex.
438         Value += Regex::escape(it->second);
439       }
440 
441       // Plop it into the regex at the adjusted offset.
442       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
443                     Value.begin(), Value.end());
444       InsertOffset += Value.size();
445     }
446 
447     // Match the newly constructed regex.
448     RegExToMatch = TmpStr;
449   }
450 
451 
452   SmallVector<StringRef, 4> MatchInfo;
453   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
454     return StringRef::npos;
455 
456   // Successful regex match.
457   assert(!MatchInfo.empty() && "Didn't get any match");
458   StringRef FullMatch = MatchInfo[0];
459 
460   // If this defines any variables, remember their values.
461   for (const auto &VariableDef : VariableDefs) {
462     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
463     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
464   }
465 
466   MatchLen = FullMatch.size();
467   return FullMatch.data()-Buffer.data();
468 }
469 
470 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
471                               const StringMap<StringRef> &VariableTable) const {
472   // Just compute the number of matching characters. For regular expressions, we
473   // just compare against the regex itself and hope for the best.
474   //
475   // FIXME: One easy improvement here is have the regex lib generate a single
476   // example regular expression which matches, and use that as the example
477   // string.
478   StringRef ExampleString(FixedStr);
479   if (ExampleString.empty())
480     ExampleString = RegExStr;
481 
482   // Only compare up to the first line in the buffer, or the string size.
483   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
484   BufferPrefix = BufferPrefix.split('\n').first;
485   return BufferPrefix.edit_distance(ExampleString);
486 }
487 
488 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
489                                const StringMap<StringRef> &VariableTable) const{
490   // If this was a regular expression using variables, print the current
491   // variable values.
492   if (!VariableUses.empty()) {
493     for (const auto &VariableUse : VariableUses) {
494       SmallString<256> Msg;
495       raw_svector_ostream OS(Msg);
496       StringRef Var = VariableUse.first;
497       if (Var[0] == '@') {
498         std::string Value;
499         if (EvaluateExpression(Var, Value)) {
500           OS << "with expression \"";
501           OS.write_escaped(Var) << "\" equal to \"";
502           OS.write_escaped(Value) << "\"";
503         } else {
504           OS << "uses incorrect expression \"";
505           OS.write_escaped(Var) << "\"";
506         }
507       } else {
508         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
509 
510         // Check for undefined variable references.
511         if (it == VariableTable.end()) {
512           OS << "uses undefined variable \"";
513           OS.write_escaped(Var) << "\"";
514         } else {
515           OS << "with variable \"";
516           OS.write_escaped(Var) << "\" equal to \"";
517           OS.write_escaped(it->second) << "\"";
518         }
519       }
520 
521       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
522                       OS.str());
523     }
524   }
525 
526   // Attempt to find the closest/best fuzzy match.  Usually an error happens
527   // because some string in the output didn't exactly match. In these cases, we
528   // would like to show the user a best guess at what "should have" matched, to
529   // save them having to actually check the input manually.
530   size_t NumLinesForward = 0;
531   size_t Best = StringRef::npos;
532   double BestQuality = 0;
533 
534   // Use an arbitrary 4k limit on how far we will search.
535   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
536     if (Buffer[i] == '\n')
537       ++NumLinesForward;
538 
539     // Patterns have leading whitespace stripped, so skip whitespace when
540     // looking for something which looks like a pattern.
541     if (Buffer[i] == ' ' || Buffer[i] == '\t')
542       continue;
543 
544     // Compute the "quality" of this match as an arbitrary combination of the
545     // match distance and the number of lines skipped to get to this match.
546     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
547     double Quality = Distance + (NumLinesForward / 100.);
548 
549     if (Quality < BestQuality || Best == StringRef::npos) {
550       Best = i;
551       BestQuality = Quality;
552     }
553   }
554 
555   // Print the "possible intended match here" line if we found something
556   // reasonable and not equal to what we showed in the "scanning from here"
557   // line.
558   if (Best && Best != StringRef::npos && BestQuality < 50) {
559       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
560                       SourceMgr::DK_Note, "possible intended match here");
561 
562     // FIXME: If we wanted to be really friendly we would show why the match
563     // failed, as it can be hard to spot simple one character differences.
564   }
565 }
566 
567 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
568   // Offset keeps track of the current offset within the input Str
569   size_t Offset = 0;
570   // [...] Nesting depth
571   size_t BracketDepth = 0;
572 
573   while (!Str.empty()) {
574     if (Str.startswith("]]") && BracketDepth == 0)
575       return Offset;
576     if (Str[0] == '\\') {
577       // Backslash escapes the next char within regexes, so skip them both.
578       Str = Str.substr(2);
579       Offset += 2;
580     } else {
581       switch (Str[0]) {
582         default:
583           break;
584         case '[':
585           BracketDepth++;
586           break;
587         case ']':
588           if (BracketDepth == 0) {
589             SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
590                             SourceMgr::DK_Error,
591                             "missing closing \"]\" for regex variable");
592             exit(1);
593           }
594           BracketDepth--;
595           break;
596       }
597       Str = Str.substr(1);
598       Offset++;
599     }
600   }
601 
602   return StringRef::npos;
603 }
604 
605 
606 //===----------------------------------------------------------------------===//
607 // Check Strings.
608 //===----------------------------------------------------------------------===//
609 
610 /// CheckString - This is a check that we found in the input file.
611 struct CheckString {
612   /// Pat - The pattern to match.
613   Pattern Pat;
614 
615   /// Prefix - Which prefix name this check matched.
616   StringRef Prefix;
617 
618   /// Loc - The location in the match file that the check string was specified.
619   SMLoc Loc;
620 
621   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
622   /// as opposed to a CHECK: directive.
623   //  Check::CheckType CheckTy;
624 
625   /// DagNotStrings - These are all of the strings that are disallowed from
626   /// occurring between this match string and the previous one (or start of
627   /// file).
628   std::vector<Pattern> DagNotStrings;
629 
630   CheckString(const Pattern &P, StringRef S, SMLoc L)
631       : Pat(P), Prefix(S), Loc(L) {}
632 
633   /// Check - Match check string and its "not strings" and/or "dag strings".
634   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
635                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
636 
637   /// CheckNext - Verify there is a single line in the given buffer.
638   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
639 
640   /// CheckSame - Verify there is no newline in the given buffer.
641   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
642 
643   /// CheckNot - Verify there's no "not strings" in the given buffer.
644   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
645                 const std::vector<const Pattern *> &NotStrings,
646                 StringMap<StringRef> &VariableTable) const;
647 
648   /// CheckDag - Match "dag strings" and their mixed "not strings".
649   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
650                   std::vector<const Pattern *> &NotStrings,
651                   StringMap<StringRef> &VariableTable) const;
652 };
653 
654 /// Canonicalize whitespaces in the input file. Line endings are replaced
655 /// with UNIX-style '\n'.
656 ///
657 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
658 /// characters to a single space.
659 static std::unique_ptr<MemoryBuffer>
660 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
661                       bool PreserveHorizontal) {
662   SmallString<128> NewFile;
663   NewFile.reserve(MB->getBufferSize());
664 
665   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
666        Ptr != End; ++Ptr) {
667     // Eliminate trailing dosish \r.
668     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
669       continue;
670     }
671 
672     // If current char is not a horizontal whitespace or if horizontal
673     // whitespace canonicalization is disabled, dump it to output as is.
674     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
675       NewFile.push_back(*Ptr);
676       continue;
677     }
678 
679     // Otherwise, add one space and advance over neighboring space.
680     NewFile.push_back(' ');
681     while (Ptr+1 != End &&
682            (Ptr[1] == ' ' || Ptr[1] == '\t'))
683       ++Ptr;
684   }
685 
686   return std::unique_ptr<MemoryBuffer>(
687       MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
688 }
689 
690 static bool IsPartOfWord(char c) {
691   return (isalnum(c) || c == '-' || c == '_');
692 }
693 
694 // Get the size of the prefix extension.
695 static size_t CheckTypeSize(Check::CheckType Ty) {
696   switch (Ty) {
697   case Check::CheckNone:
698   case Check::CheckBadNot:
699     return 0;
700 
701   case Check::CheckPlain:
702     return sizeof(":") - 1;
703 
704   case Check::CheckNext:
705     return sizeof("-NEXT:") - 1;
706 
707   case Check::CheckSame:
708     return sizeof("-SAME:") - 1;
709 
710   case Check::CheckNot:
711     return sizeof("-NOT:") - 1;
712 
713   case Check::CheckDAG:
714     return sizeof("-DAG:") - 1;
715 
716   case Check::CheckLabel:
717     return sizeof("-LABEL:") - 1;
718 
719   case Check::CheckEOF:
720     llvm_unreachable("Should not be using EOF size");
721   }
722 
723   llvm_unreachable("Bad check type");
724 }
725 
726 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
727   char NextChar = Buffer[Prefix.size()];
728 
729   // Verify that the : is present after the prefix.
730   if (NextChar == ':')
731     return Check::CheckPlain;
732 
733   if (NextChar != '-')
734     return Check::CheckNone;
735 
736   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
737   if (Rest.startswith("NEXT:"))
738     return Check::CheckNext;
739 
740   if (Rest.startswith("SAME:"))
741     return Check::CheckSame;
742 
743   if (Rest.startswith("NOT:"))
744     return Check::CheckNot;
745 
746   if (Rest.startswith("DAG:"))
747     return Check::CheckDAG;
748 
749   if (Rest.startswith("LABEL:"))
750     return Check::CheckLabel;
751 
752   // You can't combine -NOT with another suffix.
753   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
754       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
755       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
756     return Check::CheckBadNot;
757 
758   return Check::CheckNone;
759 }
760 
761 // From the given position, find the next character after the word.
762 static size_t SkipWord(StringRef Str, size_t Loc) {
763   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
764     ++Loc;
765   return Loc;
766 }
767 
768 // Try to find the first match in buffer for any prefix. If a valid match is
769 // found, return that prefix and set its type and location.  If there are almost
770 // matches (e.g. the actual prefix string is found, but is not an actual check
771 // string), but no valid match, return an empty string and set the position to
772 // resume searching from. If no partial matches are found, return an empty
773 // string and the location will be StringRef::npos. If one prefix is a substring
774 // of another, the maximal match should be found. e.g. if "A" and "AA" are
775 // prefixes then AA-CHECK: should match the second one.
776 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
777                                          Check::CheckType &CheckTy,
778                                          size_t &CheckLoc) {
779   StringRef FirstPrefix;
780   size_t FirstLoc = StringRef::npos;
781   size_t SearchLoc = StringRef::npos;
782   Check::CheckType FirstTy = Check::CheckNone;
783 
784   CheckTy = Check::CheckNone;
785   CheckLoc = StringRef::npos;
786 
787   for (StringRef Prefix : CheckPrefixes) {
788     size_t PrefixLoc = Buffer.find(Prefix);
789 
790     if (PrefixLoc == StringRef::npos)
791       continue;
792 
793     // Track where we are searching for invalid prefixes that look almost right.
794     // We need to only advance to the first partial match on the next attempt
795     // since a partial match could be a substring of a later, valid prefix.
796     // Need to skip to the end of the word, otherwise we could end up
797     // matching a prefix in a substring later.
798     if (PrefixLoc < SearchLoc)
799       SearchLoc = SkipWord(Buffer, PrefixLoc);
800 
801     // We only want to find the first match to avoid skipping some.
802     if (PrefixLoc > FirstLoc)
803       continue;
804     // If one matching check-prefix is a prefix of another, choose the
805     // longer one.
806     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
807       continue;
808 
809     StringRef Rest = Buffer.drop_front(PrefixLoc);
810     // Make sure we have actually found the prefix, and not a word containing
811     // it. This should also prevent matching the wrong prefix when one is a
812     // substring of another.
813     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
814       FirstTy = Check::CheckNone;
815     else
816       FirstTy = FindCheckType(Rest, Prefix);
817 
818     FirstLoc = PrefixLoc;
819     FirstPrefix = Prefix;
820   }
821 
822   // If the first prefix is invalid, we should continue the search after it.
823   if (FirstTy == Check::CheckNone) {
824     CheckLoc = SearchLoc;
825     return "";
826   }
827 
828   CheckTy = FirstTy;
829   CheckLoc = FirstLoc;
830   return FirstPrefix;
831 }
832 
833 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
834                                          unsigned &LineNumber,
835                                          Check::CheckType &CheckTy,
836                                          size_t &CheckLoc) {
837   while (!Buffer.empty()) {
838     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
839     // If we found a real match, we are done.
840     if (!Prefix.empty()) {
841       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
842       return Prefix;
843     }
844 
845     // We didn't find any almost matches either, we are also done.
846     if (CheckLoc == StringRef::npos)
847       return StringRef();
848 
849     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
850 
851     // Advance to the last possible match we found and try again.
852     Buffer = Buffer.drop_front(CheckLoc + 1);
853   }
854 
855   return StringRef();
856 }
857 
858 /// ReadCheckFile - Read the check file, which specifies the sequence of
859 /// expected strings.  The strings are added to the CheckStrings vector.
860 /// Returns true in case of an error, false otherwise.
861 static bool ReadCheckFile(SourceMgr &SM,
862                           std::vector<CheckString> &CheckStrings) {
863   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
864       MemoryBuffer::getFileOrSTDIN(CheckFilename);
865   if (std::error_code EC = FileOrErr.getError()) {
866     errs() << "Could not open check file '" << CheckFilename
867            << "': " << EC.message() << '\n';
868     return true;
869   }
870 
871   // If we want to canonicalize whitespace, strip excess whitespace from the
872   // buffer containing the CHECK lines. Remove DOS style line endings.
873   std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
874       std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
875 
876   // Find all instances of CheckPrefix followed by : in the file.
877   StringRef Buffer = F->getBuffer();
878 
879   SM.AddNewSourceBuffer(std::move(F), SMLoc());
880 
881   std::vector<Pattern> ImplicitNegativeChecks;
882   for (const auto &PatternString : ImplicitCheckNot) {
883     // Create a buffer with fake command line content in order to display the
884     // command line option responsible for the specific implicit CHECK-NOT.
885     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
886     std::string Suffix = "'";
887     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
888         Prefix + PatternString + Suffix, "command line");
889 
890     StringRef PatternInBuffer =
891         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
892     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
893 
894     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
895     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
896                                                "IMPLICIT-CHECK", SM, 0);
897   }
898 
899 
900   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
901 
902   // LineNumber keeps track of the line on which CheckPrefix instances are
903   // found.
904   unsigned LineNumber = 1;
905 
906   while (1) {
907     Check::CheckType CheckTy;
908     size_t PrefixLoc;
909 
910     // See if a prefix occurs in the memory buffer.
911     StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
912                                                    LineNumber,
913                                                    CheckTy,
914                                                    PrefixLoc);
915     if (UsedPrefix.empty())
916       break;
917 
918     Buffer = Buffer.drop_front(PrefixLoc);
919 
920     // Location to use for error messages.
921     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
922 
923     // PrefixLoc is to the start of the prefix. Skip to the end.
924     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
925 
926     // Complain about useful-looking but unsupported suffixes.
927     if (CheckTy == Check::CheckBadNot) {
928       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
929                       SourceMgr::DK_Error,
930                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
931       return true;
932     }
933 
934     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
935     // leading and trailing whitespace.
936     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
937 
938     // Scan ahead to the end of line.
939     size_t EOL = Buffer.find_first_of("\n\r");
940 
941     // Remember the location of the start of the pattern, for diagnostics.
942     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
943 
944     // Parse the pattern.
945     Pattern P(CheckTy);
946     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
947       return true;
948 
949     // Verify that CHECK-LABEL lines do not define or use variables
950     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
951       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
952                       SourceMgr::DK_Error,
953                       "found '" + UsedPrefix + "-LABEL:'"
954                       " with variable definition or use");
955       return true;
956     }
957 
958     Buffer = Buffer.substr(EOL);
959 
960     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
961     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
962         CheckStrings.empty()) {
963       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
964       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
965                       SourceMgr::DK_Error,
966                       "found '" + UsedPrefix + "-" + Type + "' without previous '"
967                       + UsedPrefix + ": line");
968       return true;
969     }
970 
971     // Handle CHECK-DAG/-NOT.
972     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
973       DagNotMatches.push_back(P);
974       continue;
975     }
976 
977     // Okay, add the string we captured to the output vector and move on.
978     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
979     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
980     DagNotMatches = ImplicitNegativeChecks;
981   }
982 
983   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
984   // prefix as a filler for the error message.
985   if (!DagNotMatches.empty()) {
986     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
987                               SMLoc::getFromPointer(Buffer.data()));
988     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
989   }
990 
991   if (CheckStrings.empty()) {
992     errs() << "error: no check strings found with prefix"
993            << (CheckPrefixes.size() > 1 ? "es " : " ");
994     prefix_iterator I = CheckPrefixes.begin();
995     prefix_iterator E = CheckPrefixes.end();
996     if (I != E) {
997       errs() << "\'" << *I << ":'";
998       ++I;
999     }
1000     for (; I != E; ++I)
1001       errs() << ", \'" << *I << ":'";
1002 
1003     errs() << '\n';
1004     return true;
1005   }
1006 
1007   return false;
1008 }
1009 
1010 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc,
1011                              const Pattern &Pat, StringRef Buffer,
1012                              StringMap<StringRef> &VariableTable) {
1013   // Otherwise, we have an error, emit an error message.
1014   SM.PrintMessage(Loc, SourceMgr::DK_Error,
1015                   "expected string not found in input");
1016 
1017   // Print the "scanning from here" line.  If the current position is at the
1018   // end of a line, advance to the start of the next line.
1019   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1020 
1021   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1022                   "scanning from here");
1023 
1024   // Allow the pattern to print additional information if desired.
1025   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1026 }
1027 
1028 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1029                              StringRef Buffer,
1030                              StringMap<StringRef> &VariableTable) {
1031   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1032 }
1033 
1034 /// CountNumNewlinesBetween - Count the number of newlines in the specified
1035 /// range.
1036 static unsigned CountNumNewlinesBetween(StringRef Range,
1037                                         const char *&FirstNewLine) {
1038   unsigned NumNewLines = 0;
1039   while (1) {
1040     // Scan for newline.
1041     Range = Range.substr(Range.find_first_of("\n\r"));
1042     if (Range.empty()) return NumNewLines;
1043 
1044     ++NumNewLines;
1045 
1046     // Handle \n\r and \r\n as a single newline.
1047     if (Range.size() > 1 &&
1048         (Range[1] == '\n' || Range[1] == '\r') &&
1049         (Range[0] != Range[1]))
1050       Range = Range.substr(1);
1051     Range = Range.substr(1);
1052 
1053     if (NumNewLines == 1)
1054       FirstNewLine = Range.begin();
1055   }
1056 }
1057 
1058 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1059                           bool IsLabelScanMode, size_t &MatchLen,
1060                           StringMap<StringRef> &VariableTable) const {
1061   size_t LastPos = 0;
1062   std::vector<const Pattern *> NotStrings;
1063 
1064   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1065   // bounds; we have not processed variable definitions within the bounded block
1066   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1067   // over the block again (including the last CHECK-LABEL) in normal mode.
1068   if (!IsLabelScanMode) {
1069     // Match "dag strings" (with mixed "not strings" if any).
1070     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1071     if (LastPos == StringRef::npos)
1072       return StringRef::npos;
1073   }
1074 
1075   // Match itself from the last position after matching CHECK-DAG.
1076   StringRef MatchBuffer = Buffer.substr(LastPos);
1077   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1078   if (MatchPos == StringRef::npos) {
1079     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1080     return StringRef::npos;
1081   }
1082 
1083   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1084   // or CHECK-NOT
1085   if (!IsLabelScanMode) {
1086     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1087 
1088     // If this check is a "CHECK-NEXT", verify that the previous match was on
1089     // the previous line (i.e. that there is one newline between them).
1090     if (CheckNext(SM, SkippedRegion))
1091       return StringRef::npos;
1092 
1093     // If this check is a "CHECK-SAME", verify that the previous match was on
1094     // the same line (i.e. that there is no newline between them).
1095     if (CheckSame(SM, SkippedRegion))
1096       return StringRef::npos;
1097 
1098     // If this match had "not strings", verify that they don't exist in the
1099     // skipped region.
1100     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1101       return StringRef::npos;
1102   }
1103 
1104   return LastPos + MatchPos;
1105 }
1106 
1107 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1108   if (Pat.getCheckTy() != Check::CheckNext)
1109     return false;
1110 
1111   // Count the number of newlines between the previous match and this one.
1112   assert(Buffer.data() !=
1113          SM.getMemoryBuffer(
1114            SM.FindBufferContainingLoc(
1115              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1116          "CHECK-NEXT can't be the first check in a file");
1117 
1118   const char *FirstNewLine = nullptr;
1119   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1120 
1121   if (NumNewLines == 0) {
1122     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1123                     "-NEXT: is on the same line as previous match");
1124     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1125                     SourceMgr::DK_Note, "'next' match was here");
1126     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1127                     "previous match ended here");
1128     return true;
1129   }
1130 
1131   if (NumNewLines != 1) {
1132     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1133                     "-NEXT: is not on the line after the previous match");
1134     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1135                     SourceMgr::DK_Note, "'next' match was here");
1136     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1137                     "previous match ended here");
1138     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1139                     "non-matching line after previous match is here");
1140     return true;
1141   }
1142 
1143   return false;
1144 }
1145 
1146 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1147   if (Pat.getCheckTy() != Check::CheckSame)
1148     return false;
1149 
1150   // Count the number of newlines between the previous match and this one.
1151   assert(Buffer.data() !=
1152              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1153                                     SMLoc::getFromPointer(Buffer.data())))
1154                  ->getBufferStart() &&
1155          "CHECK-SAME can't be the first check in a file");
1156 
1157   const char *FirstNewLine = nullptr;
1158   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1159 
1160   if (NumNewLines != 0) {
1161     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1162                     Prefix +
1163                         "-SAME: is not on the same line as the previous match");
1164     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1165                     "'next' match was here");
1166     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1167                     "previous match ended here");
1168     return true;
1169   }
1170 
1171   return false;
1172 }
1173 
1174 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1175                            const std::vector<const Pattern *> &NotStrings,
1176                            StringMap<StringRef> &VariableTable) const {
1177   for (const Pattern *Pat : NotStrings) {
1178     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1179 
1180     size_t MatchLen = 0;
1181     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1182 
1183     if (Pos == StringRef::npos) continue;
1184 
1185     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1186                     SourceMgr::DK_Error,
1187                     Prefix + "-NOT: string occurred!");
1188     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1189                     Prefix + "-NOT: pattern specified here");
1190     return true;
1191   }
1192 
1193   return false;
1194 }
1195 
1196 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1197                              std::vector<const Pattern *> &NotStrings,
1198                              StringMap<StringRef> &VariableTable) const {
1199   if (DagNotStrings.empty())
1200     return 0;
1201 
1202   size_t LastPos = 0;
1203   size_t StartPos = LastPos;
1204 
1205   for (const Pattern &Pat : DagNotStrings) {
1206     assert((Pat.getCheckTy() == Check::CheckDAG ||
1207             Pat.getCheckTy() == Check::CheckNot) &&
1208            "Invalid CHECK-DAG or CHECK-NOT!");
1209 
1210     if (Pat.getCheckTy() == Check::CheckNot) {
1211       NotStrings.push_back(&Pat);
1212       continue;
1213     }
1214 
1215     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1216 
1217     size_t MatchLen = 0, MatchPos;
1218 
1219     // CHECK-DAG always matches from the start.
1220     StringRef MatchBuffer = Buffer.substr(StartPos);
1221     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1222     // With a group of CHECK-DAGs, a single mismatching means the match on
1223     // that group of CHECK-DAGs fails immediately.
1224     if (MatchPos == StringRef::npos) {
1225       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1226       return StringRef::npos;
1227     }
1228     // Re-calc it as the offset relative to the start of the original string.
1229     MatchPos += StartPos;
1230 
1231     if (!NotStrings.empty()) {
1232       if (MatchPos < LastPos) {
1233         // Reordered?
1234         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1235                         SourceMgr::DK_Error,
1236                         Prefix + "-DAG: found a match of CHECK-DAG"
1237                         " reordering across a CHECK-NOT");
1238         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1239                         SourceMgr::DK_Note,
1240                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1241                         " is found here");
1242         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1243                         Prefix + "-NOT: the crossed pattern specified"
1244                         " here");
1245         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1246                         Prefix + "-DAG: the reordered pattern specified"
1247                         " here");
1248         return StringRef::npos;
1249       }
1250       // All subsequent CHECK-DAGs should be matched from the farthest
1251       // position of all precedent CHECK-DAGs (including this one.)
1252       StartPos = LastPos;
1253       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1254       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1255       // region.
1256       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1257       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1258         return StringRef::npos;
1259       // Clear "not strings".
1260       NotStrings.clear();
1261     }
1262 
1263     // Update the last position with CHECK-DAG matches.
1264     LastPos = std::max(MatchPos + MatchLen, LastPos);
1265   }
1266 
1267   return LastPos;
1268 }
1269 
1270 // A check prefix must contain only alphanumeric, hyphens and underscores.
1271 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1272   Regex Validator("^[a-zA-Z0-9_-]*$");
1273   return Validator.match(CheckPrefix);
1274 }
1275 
1276 static bool ValidateCheckPrefixes() {
1277   StringSet<> PrefixSet;
1278 
1279   for (StringRef Prefix : CheckPrefixes) {
1280     // Reject empty prefixes.
1281     if (Prefix == "")
1282       return false;
1283 
1284     if (!PrefixSet.insert(Prefix).second)
1285       return false;
1286 
1287     if (!ValidateCheckPrefix(Prefix))
1288       return false;
1289   }
1290 
1291   return true;
1292 }
1293 
1294 // I don't think there's a way to specify an initial value for cl::list,
1295 // so if nothing was specified, add the default
1296 static void AddCheckPrefixIfNeeded() {
1297   if (CheckPrefixes.empty())
1298     CheckPrefixes.push_back("CHECK");
1299 }
1300 
1301 static void DumpCommandLine(int argc, char **argv) {
1302   errs() << "FileCheck command line: ";
1303   for (int I = 0; I < argc; I++)
1304     errs() << " " << argv[I];
1305   errs() << "\n";
1306 }
1307 
1308 int main(int argc, char **argv) {
1309   sys::PrintStackTraceOnErrorSignal(argv[0]);
1310   PrettyStackTraceProgram X(argc, argv);
1311   cl::ParseCommandLineOptions(argc, argv);
1312 
1313   if (!ValidateCheckPrefixes()) {
1314     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1315               "start with a letter and contain only alphanumeric characters, "
1316               "hyphens and underscores\n";
1317     return 2;
1318   }
1319 
1320   AddCheckPrefixIfNeeded();
1321 
1322   SourceMgr SM;
1323 
1324   // Read the expected strings from the check file.
1325   std::vector<CheckString> CheckStrings;
1326   if (ReadCheckFile(SM, CheckStrings))
1327     return 2;
1328 
1329   // Open the file to check and add it to SourceMgr.
1330   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1331       MemoryBuffer::getFileOrSTDIN(InputFilename);
1332   if (std::error_code EC = FileOrErr.getError()) {
1333     errs() << "Could not open input file '" << InputFilename
1334            << "': " << EC.message() << '\n';
1335     return 2;
1336   }
1337   std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1338 
1339   if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1340     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1341     DumpCommandLine(argc, argv);
1342     return 2;
1343   }
1344 
1345   // Remove duplicate spaces in the input file if requested.
1346   // Remove DOS style line endings.
1347   std::unique_ptr<MemoryBuffer> F =
1348       CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1349 
1350   // Check that we have all of the expected strings, in order, in the input
1351   // file.
1352   StringRef Buffer = F->getBuffer();
1353 
1354   SM.AddNewSourceBuffer(std::move(F), SMLoc());
1355 
1356   /// VariableTable - This holds all the current filecheck variables.
1357   StringMap<StringRef> VariableTable;
1358 
1359   bool hasError = false;
1360 
1361   unsigned i = 0, j = 0, e = CheckStrings.size();
1362 
1363   while (true) {
1364     StringRef CheckRegion;
1365     if (j == e) {
1366       CheckRegion = Buffer;
1367     } else {
1368       const CheckString &CheckLabelStr = CheckStrings[j];
1369       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1370         ++j;
1371         continue;
1372       }
1373 
1374       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1375       size_t MatchLabelLen = 0;
1376       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1377                                                  MatchLabelLen, VariableTable);
1378       if (MatchLabelPos == StringRef::npos) {
1379         hasError = true;
1380         break;
1381       }
1382 
1383       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1384       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1385       ++j;
1386     }
1387 
1388     for ( ; i != j; ++i) {
1389       const CheckString &CheckStr = CheckStrings[i];
1390 
1391       // Check each string within the scanned region, including a second check
1392       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1393       size_t MatchLen = 0;
1394       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1395                                        VariableTable);
1396 
1397       if (MatchPos == StringRef::npos) {
1398         hasError = true;
1399         i = j;
1400         break;
1401       }
1402 
1403       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1404     }
1405 
1406     if (j == e)
1407       break;
1408   }
1409 
1410   return hasError ? 1 : 0;
1411 }
1412