xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision 614c986175872eefadaaac94919670bcf18acf4b)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an exit status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/InitLLVM.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <algorithm>
30 #include <cctype>
31 #include <map>
32 #include <string>
33 #include <system_error>
34 #include <vector>
35 using namespace llvm;
36 
37 static cl::opt<std::string>
38     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
39 
40 static cl::opt<std::string>
41     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
42                   cl::init("-"), cl::value_desc("filename"));
43 
44 static cl::list<std::string> CheckPrefixes(
45     "check-prefix",
46     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
47 static cl::alias CheckPrefixesAlias(
48     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
49     cl::NotHidden,
50     cl::desc(
51         "Alias for -check-prefix permitting multiple comma separated values"));
52 
53 static cl::opt<bool> NoCanonicalizeWhiteSpace(
54     "strict-whitespace",
55     cl::desc("Do not treat all horizontal whitespace as equivalent"));
56 
57 static cl::list<std::string> ImplicitCheckNot(
58     "implicit-check-not",
59     cl::desc("Add an implicit negative check with this pattern to every\n"
60              "positive check. This can be used to ensure that no instances of\n"
61              "this pattern occur which are not matched by a positive pattern"),
62     cl::value_desc("pattern"));
63 
64 static cl::list<std::string> GlobalDefines("D", cl::Prefix,
65     cl::desc("Define a variable to be used in capture patterns."),
66     cl::value_desc("VAR=VALUE"));
67 
68 static cl::opt<bool> AllowEmptyInput(
69     "allow-empty", cl::init(false),
70     cl::desc("Allow the input file to be empty. This is useful when making\n"
71              "checks that some error message does not occur, for example."));
72 
73 static cl::opt<bool> MatchFullLines(
74     "match-full-lines", cl::init(false),
75     cl::desc("Require all positive matches to cover an entire input line.\n"
76              "Allows leading and trailing whitespace if --strict-whitespace\n"
77              "is not also passed."));
78 
79 static cl::opt<bool> EnableVarScope(
80     "enable-var-scope", cl::init(false),
81     cl::desc("Enables scope for regex variables. Variables with names that\n"
82              "do not start with '$' will be reset at the beginning of\n"
83              "each CHECK-LABEL block."));
84 
85 typedef cl::list<std::string>::const_iterator prefix_iterator;
86 
87 //===----------------------------------------------------------------------===//
88 // Pattern Handling Code.
89 //===----------------------------------------------------------------------===//
90 
91 namespace Check {
92 enum CheckType {
93   CheckNone = 0,
94   CheckPlain,
95   CheckNext,
96   CheckSame,
97   CheckNot,
98   CheckDAG,
99   CheckLabel,
100   CheckEmpty,
101 
102   /// Indicates the pattern only matches the end of file. This is used for
103   /// trailing CHECK-NOTs.
104   CheckEOF,
105 
106   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
107   CheckBadNot
108 };
109 }
110 
111 class Pattern {
112   SMLoc PatternLoc;
113 
114   /// A fixed string to match as the pattern or empty if this pattern requires
115   /// a regex match.
116   StringRef FixedStr;
117 
118   /// A regex string to match as the pattern or empty if this pattern requires
119   /// a fixed string to match.
120   std::string RegExStr;
121 
122   /// Entries in this vector map to uses of a variable in the pattern, e.g.
123   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
124   /// we'll get an entry in this vector that tells us to insert the value of
125   /// bar at offset 3.
126   std::vector<std::pair<StringRef, unsigned>> VariableUses;
127 
128   /// Maps definitions of variables to their parenthesized capture numbers.
129   ///
130   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
131   /// 1.
132   std::map<StringRef, unsigned> VariableDefs;
133 
134   Check::CheckType CheckTy;
135 
136   /// Contains the number of line this pattern is in.
137   unsigned LineNumber;
138 
139 public:
140   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
141 
142   /// Returns the location in source code.
143   SMLoc getLoc() const { return PatternLoc; }
144 
145   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
146                     unsigned LineNumber);
147   size_t Match(StringRef Buffer, size_t &MatchLen,
148                StringMap<StringRef> &VariableTable) const;
149   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
150                         const StringMap<StringRef> &VariableTable) const;
151 
152   bool hasVariable() const {
153     return !(VariableUses.empty() && VariableDefs.empty());
154   }
155 
156   Check::CheckType getCheckTy() const { return CheckTy; }
157 
158 private:
159   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
160   void AddBackrefToRegEx(unsigned BackrefNum);
161   unsigned
162   ComputeMatchDistance(StringRef Buffer,
163                        const StringMap<StringRef> &VariableTable) const;
164   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
165   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
166 };
167 
168 /// Parses the given string into the Pattern.
169 ///
170 /// \p Prefix provides which prefix is being matched, \p SM provides the
171 /// SourceMgr used for error reports, and \p LineNumber is the line number in
172 /// the input file from which the pattern string was read. Returns true in
173 /// case of an error, false otherwise.
174 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
175                            SourceMgr &SM, unsigned LineNumber) {
176   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
177 
178   this->LineNumber = LineNumber;
179   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
180 
181   if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
182     // Ignore trailing whitespace.
183     while (!PatternStr.empty() &&
184            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
185       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
186 
187   // Check that there is something on the line.
188   if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
189     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
190                     "found empty check string with prefix '" + Prefix + ":'");
191     return true;
192   }
193 
194   if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
195     SM.PrintMessage(
196         PatternLoc, SourceMgr::DK_Error,
197         "found non-empty check string for empty check with prefix '" + Prefix +
198             ":'");
199     return true;
200   }
201 
202   if (CheckTy == Check::CheckEmpty) {
203     RegExStr = "(\n$)";
204     return false;
205   }
206 
207   // Check to see if this is a fixed string, or if it has regex pieces.
208   if (!MatchFullLinesHere &&
209       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
210                                  PatternStr.find("[[") == StringRef::npos))) {
211     FixedStr = PatternStr;
212     return false;
213   }
214 
215   if (MatchFullLinesHere) {
216     RegExStr += '^';
217     if (!NoCanonicalizeWhiteSpace)
218       RegExStr += " *";
219   }
220 
221   // Paren value #0 is for the fully matched string.  Any new parenthesized
222   // values add from there.
223   unsigned CurParen = 1;
224 
225   // Otherwise, there is at least one regex piece.  Build up the regex pattern
226   // by escaping scary characters in fixed strings, building up one big regex.
227   while (!PatternStr.empty()) {
228     // RegEx matches.
229     if (PatternStr.startswith("{{")) {
230       // This is the start of a regex match.  Scan for the }}.
231       size_t End = PatternStr.find("}}");
232       if (End == StringRef::npos) {
233         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
234                         SourceMgr::DK_Error,
235                         "found start of regex string with no end '}}'");
236         return true;
237       }
238 
239       // Enclose {{}} patterns in parens just like [[]] even though we're not
240       // capturing the result for any purpose.  This is required in case the
241       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
242       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
243       RegExStr += '(';
244       ++CurParen;
245 
246       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
247         return true;
248       RegExStr += ')';
249 
250       PatternStr = PatternStr.substr(End + 2);
251       continue;
252     }
253 
254     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
255     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
256     // second form is [[foo]] which is a reference to foo.  The variable name
257     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
258     // it.  This is to catch some common errors.
259     if (PatternStr.startswith("[[")) {
260       // Find the closing bracket pair ending the match.  End is going to be an
261       // offset relative to the beginning of the match string.
262       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
263 
264       if (End == StringRef::npos) {
265         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
266                         SourceMgr::DK_Error,
267                         "invalid named regex reference, no ]] found");
268         return true;
269       }
270 
271       StringRef MatchStr = PatternStr.substr(2, End);
272       PatternStr = PatternStr.substr(End + 4);
273 
274       // Get the regex name (e.g. "foo").
275       size_t NameEnd = MatchStr.find(':');
276       StringRef Name = MatchStr.substr(0, NameEnd);
277 
278       if (Name.empty()) {
279         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
280                         "invalid name in named regex: empty name");
281         return true;
282       }
283 
284       // Verify that the name/expression is well formed. FileCheck currently
285       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
286       // is relaxed, more strict check is performed in \c EvaluateExpression.
287       bool IsExpression = false;
288       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
289         if (i == 0) {
290           if (Name[i] == '$')  // Global vars start with '$'
291             continue;
292           if (Name[i] == '@') {
293             if (NameEnd != StringRef::npos) {
294               SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
295                               SourceMgr::DK_Error,
296                               "invalid name in named regex definition");
297               return true;
298             }
299             IsExpression = true;
300             continue;
301           }
302         }
303         if (Name[i] != '_' && !isalnum(Name[i]) &&
304             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
305           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
306                           SourceMgr::DK_Error, "invalid name in named regex");
307           return true;
308         }
309       }
310 
311       // Name can't start with a digit.
312       if (isdigit(static_cast<unsigned char>(Name[0]))) {
313         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
314                         "invalid name in named regex");
315         return true;
316       }
317 
318       // Handle [[foo]].
319       if (NameEnd == StringRef::npos) {
320         // Handle variables that were defined earlier on the same line by
321         // emitting a backreference.
322         if (VariableDefs.find(Name) != VariableDefs.end()) {
323           unsigned VarParenNum = VariableDefs[Name];
324           if (VarParenNum < 1 || VarParenNum > 9) {
325             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
326                             SourceMgr::DK_Error,
327                             "Can't back-reference more than 9 variables");
328             return true;
329           }
330           AddBackrefToRegEx(VarParenNum);
331         } else {
332           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
333         }
334         continue;
335       }
336 
337       // Handle [[foo:.*]].
338       VariableDefs[Name] = CurParen;
339       RegExStr += '(';
340       ++CurParen;
341 
342       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
343         return true;
344 
345       RegExStr += ')';
346     }
347 
348     // Handle fixed string matches.
349     // Find the end, which is the start of the next regex.
350     size_t FixedMatchEnd = PatternStr.find("{{");
351     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
352     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
353     PatternStr = PatternStr.substr(FixedMatchEnd);
354   }
355 
356   if (MatchFullLinesHere) {
357     if (!NoCanonicalizeWhiteSpace)
358       RegExStr += " *";
359     RegExStr += '$';
360   }
361 
362   return false;
363 }
364 
365 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
366   Regex R(RS);
367   std::string Error;
368   if (!R.isValid(Error)) {
369     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
370                     "invalid regex: " + Error);
371     return true;
372   }
373 
374   RegExStr += RS.str();
375   CurParen += R.getNumMatches();
376   return false;
377 }
378 
379 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
380   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
381   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
382   RegExStr += Backref;
383 }
384 
385 /// Evaluates expression and stores the result to \p Value.
386 ///
387 /// Returns true on success and false when the expression has invalid syntax.
388 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
389   // The only supported expression is @LINE([\+-]\d+)?
390   if (!Expr.startswith("@LINE"))
391     return false;
392   Expr = Expr.substr(StringRef("@LINE").size());
393   int Offset = 0;
394   if (!Expr.empty()) {
395     if (Expr[0] == '+')
396       Expr = Expr.substr(1);
397     else if (Expr[0] != '-')
398       return false;
399     if (Expr.getAsInteger(10, Offset))
400       return false;
401   }
402   Value = llvm::itostr(LineNumber + Offset);
403   return true;
404 }
405 
406 /// Matches the pattern string against the input buffer \p Buffer
407 ///
408 /// This returns the position that is matched or npos if there is no match. If
409 /// there is a match, the size of the matched string is returned in \p
410 /// MatchLen.
411 ///
412 /// The \p VariableTable StringMap provides the current values of filecheck
413 /// variables and is updated if this match defines new values.
414 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
415                       StringMap<StringRef> &VariableTable) const {
416   // If this is the EOF pattern, match it immediately.
417   if (CheckTy == Check::CheckEOF) {
418     MatchLen = 0;
419     return Buffer.size();
420   }
421 
422   // If this is a fixed string pattern, just match it now.
423   if (!FixedStr.empty()) {
424     MatchLen = FixedStr.size();
425     return Buffer.find(FixedStr);
426   }
427 
428   // Regex match.
429 
430   // If there are variable uses, we need to create a temporary string with the
431   // actual value.
432   StringRef RegExToMatch = RegExStr;
433   std::string TmpStr;
434   if (!VariableUses.empty()) {
435     TmpStr = RegExStr;
436 
437     unsigned InsertOffset = 0;
438     for (const auto &VariableUse : VariableUses) {
439       std::string Value;
440 
441       if (VariableUse.first[0] == '@') {
442         if (!EvaluateExpression(VariableUse.first, Value))
443           return StringRef::npos;
444       } else {
445         StringMap<StringRef>::iterator it =
446             VariableTable.find(VariableUse.first);
447         // If the variable is undefined, return an error.
448         if (it == VariableTable.end())
449           return StringRef::npos;
450 
451         // Look up the value and escape it so that we can put it into the regex.
452         Value += Regex::escape(it->second);
453       }
454 
455       // Plop it into the regex at the adjusted offset.
456       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
457                     Value.begin(), Value.end());
458       InsertOffset += Value.size();
459     }
460 
461     // Match the newly constructed regex.
462     RegExToMatch = TmpStr;
463   }
464 
465   SmallVector<StringRef, 4> MatchInfo;
466   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
467     return StringRef::npos;
468 
469   // Successful regex match.
470   assert(!MatchInfo.empty() && "Didn't get any match");
471   StringRef FullMatch = MatchInfo[0];
472 
473   // If this defines any variables, remember their values.
474   for (const auto &VariableDef : VariableDefs) {
475     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
476     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
477   }
478 
479   MatchLen = FullMatch.size();
480   return FullMatch.data() - Buffer.data();
481 }
482 
483 
484 /// Computes an arbitrary estimate for the quality of matching this pattern at
485 /// the start of \p Buffer; a distance of zero should correspond to a perfect
486 /// match.
487 unsigned
488 Pattern::ComputeMatchDistance(StringRef Buffer,
489                               const StringMap<StringRef> &VariableTable) const {
490   // Just compute the number of matching characters. For regular expressions, we
491   // just compare against the regex itself and hope for the best.
492   //
493   // FIXME: One easy improvement here is have the regex lib generate a single
494   // example regular expression which matches, and use that as the example
495   // string.
496   StringRef ExampleString(FixedStr);
497   if (ExampleString.empty())
498     ExampleString = RegExStr;
499 
500   // Only compare up to the first line in the buffer, or the string size.
501   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
502   BufferPrefix = BufferPrefix.split('\n').first;
503   return BufferPrefix.edit_distance(ExampleString);
504 }
505 
506 /// Prints additional information about a failure to match involving this
507 /// pattern.
508 void Pattern::PrintFailureInfo(
509     const SourceMgr &SM, StringRef Buffer,
510     const StringMap<StringRef> &VariableTable) const {
511   // If this was a regular expression using variables, print the current
512   // variable values.
513   if (!VariableUses.empty()) {
514     for (const auto &VariableUse : VariableUses) {
515       SmallString<256> Msg;
516       raw_svector_ostream OS(Msg);
517       StringRef Var = VariableUse.first;
518       if (Var[0] == '@') {
519         std::string Value;
520         if (EvaluateExpression(Var, Value)) {
521           OS << "with expression \"";
522           OS.write_escaped(Var) << "\" equal to \"";
523           OS.write_escaped(Value) << "\"";
524         } else {
525           OS << "uses incorrect expression \"";
526           OS.write_escaped(Var) << "\"";
527         }
528       } else {
529         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
530 
531         // Check for undefined variable references.
532         if (it == VariableTable.end()) {
533           OS << "uses undefined variable \"";
534           OS.write_escaped(Var) << "\"";
535         } else {
536           OS << "with variable \"";
537           OS.write_escaped(Var) << "\" equal to \"";
538           OS.write_escaped(it->second) << "\"";
539         }
540       }
541 
542       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
543                       OS.str());
544     }
545   }
546 
547   // Attempt to find the closest/best fuzzy match.  Usually an error happens
548   // because some string in the output didn't exactly match. In these cases, we
549   // would like to show the user a best guess at what "should have" matched, to
550   // save them having to actually check the input manually.
551   size_t NumLinesForward = 0;
552   size_t Best = StringRef::npos;
553   double BestQuality = 0;
554 
555   // Use an arbitrary 4k limit on how far we will search.
556   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
557     if (Buffer[i] == '\n')
558       ++NumLinesForward;
559 
560     // Patterns have leading whitespace stripped, so skip whitespace when
561     // looking for something which looks like a pattern.
562     if (Buffer[i] == ' ' || Buffer[i] == '\t')
563       continue;
564 
565     // Compute the "quality" of this match as an arbitrary combination of the
566     // match distance and the number of lines skipped to get to this match.
567     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
568     double Quality = Distance + (NumLinesForward / 100.);
569 
570     if (Quality < BestQuality || Best == StringRef::npos) {
571       Best = i;
572       BestQuality = Quality;
573     }
574   }
575 
576   // Print the "possible intended match here" line if we found something
577   // reasonable and not equal to what we showed in the "scanning from here"
578   // line.
579   if (Best && Best != StringRef::npos && BestQuality < 50) {
580     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
581                     SourceMgr::DK_Note, "possible intended match here");
582 
583     // FIXME: If we wanted to be really friendly we would show why the match
584     // failed, as it can be hard to spot simple one character differences.
585   }
586 }
587 
588 /// Finds the closing sequence of a regex variable usage or definition.
589 ///
590 /// \p Str has to point in the beginning of the definition (right after the
591 /// opening sequence). Returns the offset of the closing sequence within Str,
592 /// or npos if it was not found.
593 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
594   // Offset keeps track of the current offset within the input Str
595   size_t Offset = 0;
596   // [...] Nesting depth
597   size_t BracketDepth = 0;
598 
599   while (!Str.empty()) {
600     if (Str.startswith("]]") && BracketDepth == 0)
601       return Offset;
602     if (Str[0] == '\\') {
603       // Backslash escapes the next char within regexes, so skip them both.
604       Str = Str.substr(2);
605       Offset += 2;
606     } else {
607       switch (Str[0]) {
608       default:
609         break;
610       case '[':
611         BracketDepth++;
612         break;
613       case ']':
614         if (BracketDepth == 0) {
615           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
616                           SourceMgr::DK_Error,
617                           "missing closing \"]\" for regex variable");
618           exit(1);
619         }
620         BracketDepth--;
621         break;
622       }
623       Str = Str.substr(1);
624       Offset++;
625     }
626   }
627 
628   return StringRef::npos;
629 }
630 
631 //===----------------------------------------------------------------------===//
632 // Check Strings.
633 //===----------------------------------------------------------------------===//
634 
635 /// A check that we found in the input file.
636 struct CheckString {
637   /// The pattern to match.
638   Pattern Pat;
639 
640   /// Which prefix name this check matched.
641   StringRef Prefix;
642 
643   /// The location in the match file that the check string was specified.
644   SMLoc Loc;
645 
646   /// All of the strings that are disallowed from occurring between this match
647   /// string and the previous one (or start of file).
648   std::vector<Pattern> DagNotStrings;
649 
650   CheckString(const Pattern &P, StringRef S, SMLoc L)
651       : Pat(P), Prefix(S), Loc(L) {}
652 
653   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
654                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
655 
656   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
657   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
658   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
659                 const std::vector<const Pattern *> &NotStrings,
660                 StringMap<StringRef> &VariableTable) const;
661   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
662                   std::vector<const Pattern *> &NotStrings,
663                   StringMap<StringRef> &VariableTable) const;
664 };
665 
666 /// Canonicalize whitespaces in the file. Line endings are replaced with
667 /// UNIX-style '\n'.
668 static StringRef CanonicalizeFile(MemoryBuffer &MB,
669                                   SmallVectorImpl<char> &OutputBuffer) {
670   OutputBuffer.reserve(MB.getBufferSize());
671 
672   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
673        Ptr != End; ++Ptr) {
674     // Eliminate trailing dosish \r.
675     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
676       continue;
677     }
678 
679     // If current char is not a horizontal whitespace or if horizontal
680     // whitespace canonicalization is disabled, dump it to output as is.
681     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
682       OutputBuffer.push_back(*Ptr);
683       continue;
684     }
685 
686     // Otherwise, add one space and advance over neighboring space.
687     OutputBuffer.push_back(' ');
688     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
689       ++Ptr;
690   }
691 
692   // Add a null byte and then return all but that byte.
693   OutputBuffer.push_back('\0');
694   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
695 }
696 
697 static bool IsPartOfWord(char c) {
698   return (isalnum(c) || c == '-' || c == '_');
699 }
700 
701 // Get the size of the prefix extension.
702 static size_t CheckTypeSize(Check::CheckType Ty) {
703   switch (Ty) {
704   case Check::CheckNone:
705   case Check::CheckBadNot:
706     return 0;
707 
708   case Check::CheckPlain:
709     return sizeof(":") - 1;
710 
711   case Check::CheckNext:
712     return sizeof("-NEXT:") - 1;
713 
714   case Check::CheckSame:
715     return sizeof("-SAME:") - 1;
716 
717   case Check::CheckNot:
718     return sizeof("-NOT:") - 1;
719 
720   case Check::CheckDAG:
721     return sizeof("-DAG:") - 1;
722 
723   case Check::CheckLabel:
724     return sizeof("-LABEL:") - 1;
725 
726   case Check::CheckEmpty:
727     return sizeof("-EMPTY:") - 1;
728 
729   case Check::CheckEOF:
730     llvm_unreachable("Should not be using EOF size");
731   }
732 
733   llvm_unreachable("Bad check type");
734 }
735 
736 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
737   if (Buffer.size() <= Prefix.size())
738     return Check::CheckNone;
739 
740   char NextChar = Buffer[Prefix.size()];
741 
742   // Verify that the : is present after the prefix.
743   if (NextChar == ':')
744     return Check::CheckPlain;
745 
746   if (NextChar != '-')
747     return Check::CheckNone;
748 
749   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
750   if (Rest.startswith("NEXT:"))
751     return Check::CheckNext;
752 
753   if (Rest.startswith("SAME:"))
754     return Check::CheckSame;
755 
756   if (Rest.startswith("NOT:"))
757     return Check::CheckNot;
758 
759   if (Rest.startswith("DAG:"))
760     return Check::CheckDAG;
761 
762   if (Rest.startswith("LABEL:"))
763     return Check::CheckLabel;
764 
765   if (Rest.startswith("EMPTY:"))
766     return Check::CheckEmpty;
767 
768   // You can't combine -NOT with another suffix.
769   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
770       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
771       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
772       Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
773     return Check::CheckBadNot;
774 
775   return Check::CheckNone;
776 }
777 
778 // From the given position, find the next character after the word.
779 static size_t SkipWord(StringRef Str, size_t Loc) {
780   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
781     ++Loc;
782   return Loc;
783 }
784 
785 /// Search the buffer for the first prefix in the prefix regular expression.
786 ///
787 /// This searches the buffer using the provided regular expression, however it
788 /// enforces constraints beyond that:
789 /// 1) The found prefix must not be a suffix of something that looks like
790 ///    a valid prefix.
791 /// 2) The found prefix must be followed by a valid check type suffix using \c
792 ///    FindCheckType above.
793 ///
794 /// The first match of the regular expression to satisfy these two is returned,
795 /// otherwise an empty StringRef is returned to indicate failure.
796 ///
797 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
798 /// start at the beginning of the returned prefix, increment \p LineNumber for
799 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
800 /// check found by examining the suffix.
801 ///
802 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
803 /// is unspecified.
804 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
805                                          unsigned &LineNumber,
806                                          Check::CheckType &CheckTy) {
807   SmallVector<StringRef, 2> Matches;
808 
809   while (!Buffer.empty()) {
810     // Find the first (longest) match using the RE.
811     if (!PrefixRE.match(Buffer, &Matches))
812       // No match at all, bail.
813       return StringRef();
814 
815     StringRef Prefix = Matches[0];
816     Matches.clear();
817 
818     assert(Prefix.data() >= Buffer.data() &&
819            Prefix.data() < Buffer.data() + Buffer.size() &&
820            "Prefix doesn't start inside of buffer!");
821     size_t Loc = Prefix.data() - Buffer.data();
822     StringRef Skipped = Buffer.substr(0, Loc);
823     Buffer = Buffer.drop_front(Loc);
824     LineNumber += Skipped.count('\n');
825 
826     // Check that the matched prefix isn't a suffix of some other check-like
827     // word.
828     // FIXME: This is a very ad-hoc check. it would be better handled in some
829     // other way. Among other things it seems hard to distinguish between
830     // intentional and unintentional uses of this feature.
831     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
832       // Now extract the type.
833       CheckTy = FindCheckType(Buffer, Prefix);
834 
835       // If we've found a valid check type for this prefix, we're done.
836       if (CheckTy != Check::CheckNone)
837         return Prefix;
838     }
839 
840     // If we didn't successfully find a prefix, we need to skip this invalid
841     // prefix and continue scanning. We directly skip the prefix that was
842     // matched and any additional parts of that check-like word.
843     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
844   }
845 
846   // We ran out of buffer while skipping partial matches so give up.
847   return StringRef();
848 }
849 
850 /// Read the check file, which specifies the sequence of expected strings.
851 ///
852 /// The strings are added to the CheckStrings vector. Returns true in case of
853 /// an error, false otherwise.
854 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
855                           std::vector<CheckString> &CheckStrings) {
856   std::vector<Pattern> ImplicitNegativeChecks;
857   for (const auto &PatternString : ImplicitCheckNot) {
858     // Create a buffer with fake command line content in order to display the
859     // command line option responsible for the specific implicit CHECK-NOT.
860     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
861     std::string Suffix = "'";
862     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
863         Prefix + PatternString + Suffix, "command line");
864 
865     StringRef PatternInBuffer =
866         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
867     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
868 
869     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
870     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
871                                                "IMPLICIT-CHECK", SM, 0);
872   }
873 
874   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
875 
876   // LineNumber keeps track of the line on which CheckPrefix instances are
877   // found.
878   unsigned LineNumber = 1;
879 
880   while (1) {
881     Check::CheckType CheckTy;
882 
883     // See if a prefix occurs in the memory buffer.
884     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
885                                                    CheckTy);
886     if (UsedPrefix.empty())
887       break;
888     assert(UsedPrefix.data() == Buffer.data() &&
889            "Failed to move Buffer's start forward, or pointed prefix outside "
890            "of the buffer!");
891 
892     // Location to use for error messages.
893     const char *UsedPrefixStart = UsedPrefix.data();
894 
895     // Skip the buffer to the end.
896     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
897 
898     // Complain about useful-looking but unsupported suffixes.
899     if (CheckTy == Check::CheckBadNot) {
900       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
901                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
902       return true;
903     }
904 
905     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
906     // leading whitespace.
907     if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
908       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
909 
910     // Scan ahead to the end of line.
911     size_t EOL = Buffer.find_first_of("\n\r");
912 
913     // Remember the location of the start of the pattern, for diagnostics.
914     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
915 
916     // Parse the pattern.
917     Pattern P(CheckTy);
918     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
919       return true;
920 
921     // Verify that CHECK-LABEL lines do not define or use variables
922     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
923       SM.PrintMessage(
924           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
925           "found '" + UsedPrefix + "-LABEL:'"
926                                    " with variable definition or use");
927       return true;
928     }
929 
930     Buffer = Buffer.substr(EOL);
931 
932     // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
933     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
934          CheckTy == Check::CheckEmpty) &&
935         CheckStrings.empty()) {
936       StringRef Type = CheckTy == Check::CheckNext
937                            ? "NEXT"
938                            : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
939       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
940                       SourceMgr::DK_Error,
941                       "found '" + UsedPrefix + "-" + Type +
942                           "' without previous '" + UsedPrefix + ": line");
943       return true;
944     }
945 
946     // Handle CHECK-DAG/-NOT.
947     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
948       DagNotMatches.push_back(P);
949       continue;
950     }
951 
952     // Okay, add the string we captured to the output vector and move on.
953     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
954     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
955     DagNotMatches = ImplicitNegativeChecks;
956   }
957 
958   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
959   // prefix as a filler for the error message.
960   if (!DagNotMatches.empty()) {
961     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
962                               SMLoc::getFromPointer(Buffer.data()));
963     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
964   }
965 
966   if (CheckStrings.empty()) {
967     errs() << "error: no check strings found with prefix"
968            << (CheckPrefixes.size() > 1 ? "es " : " ");
969     prefix_iterator I = CheckPrefixes.begin();
970     prefix_iterator E = CheckPrefixes.end();
971     if (I != E) {
972       errs() << "\'" << *I << ":'";
973       ++I;
974     }
975     for (; I != E; ++I)
976       errs() << ", \'" << *I << ":'";
977 
978     errs() << '\n';
979     return true;
980   }
981 
982   return false;
983 }
984 
985 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
986                              StringRef Buffer,
987                              StringMap<StringRef> &VariableTable) {
988   // Otherwise, we have an error, emit an error message.
989   SM.PrintMessage(Loc, SourceMgr::DK_Error,
990                   "expected string not found in input");
991 
992   // Print the "scanning from here" line.  If the current position is at the
993   // end of a line, advance to the start of the next line.
994   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
995 
996   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
997                   "scanning from here");
998 
999   // Allow the pattern to print additional information if desired.
1000   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1001 }
1002 
1003 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1004                              StringRef Buffer,
1005                              StringMap<StringRef> &VariableTable) {
1006   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1007 }
1008 
1009 /// Count the number of newlines in the specified range.
1010 static unsigned CountNumNewlinesBetween(StringRef Range,
1011                                         const char *&FirstNewLine) {
1012   unsigned NumNewLines = 0;
1013   while (1) {
1014     // Scan for newline.
1015     Range = Range.substr(Range.find_first_of("\n\r"));
1016     if (Range.empty())
1017       return NumNewLines;
1018 
1019     ++NumNewLines;
1020 
1021     // Handle \n\r and \r\n as a single newline.
1022     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
1023         (Range[0] != Range[1]))
1024       Range = Range.substr(1);
1025     Range = Range.substr(1);
1026 
1027     if (NumNewLines == 1)
1028       FirstNewLine = Range.begin();
1029   }
1030 }
1031 
1032 /// Match check string and its "not strings" and/or "dag strings".
1033 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1034                           bool IsLabelScanMode, size_t &MatchLen,
1035                           StringMap<StringRef> &VariableTable) const {
1036   size_t LastPos = 0;
1037   std::vector<const Pattern *> NotStrings;
1038 
1039   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1040   // bounds; we have not processed variable definitions within the bounded block
1041   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1042   // over the block again (including the last CHECK-LABEL) in normal mode.
1043   if (!IsLabelScanMode) {
1044     // Match "dag strings" (with mixed "not strings" if any).
1045     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1046     if (LastPos == StringRef::npos)
1047       return StringRef::npos;
1048   }
1049 
1050   // Match itself from the last position after matching CHECK-DAG.
1051   StringRef MatchBuffer = Buffer.substr(LastPos);
1052   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1053   if (MatchPos == StringRef::npos) {
1054     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1055     return StringRef::npos;
1056   }
1057 
1058   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1059   // or CHECK-NOT
1060   if (!IsLabelScanMode) {
1061     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1062 
1063     // If this check is a "CHECK-NEXT", verify that the previous match was on
1064     // the previous line (i.e. that there is one newline between them).
1065     if (CheckNext(SM, SkippedRegion))
1066       return StringRef::npos;
1067 
1068     // If this check is a "CHECK-SAME", verify that the previous match was on
1069     // the same line (i.e. that there is no newline between them).
1070     if (CheckSame(SM, SkippedRegion))
1071       return StringRef::npos;
1072 
1073     // If this match had "not strings", verify that they don't exist in the
1074     // skipped region.
1075     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1076       return StringRef::npos;
1077   }
1078 
1079   return LastPos + MatchPos;
1080 }
1081 
1082 /// Verify there is a single line in the given buffer.
1083 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1084   if (Pat.getCheckTy() != Check::CheckNext &&
1085       Pat.getCheckTy() != Check::CheckEmpty)
1086     return false;
1087 
1088   Twine CheckName =
1089       Prefix +
1090       Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
1091 
1092   // Count the number of newlines between the previous match and this one.
1093   assert(Buffer.data() !=
1094              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1095                                     SMLoc::getFromPointer(Buffer.data())))
1096                  ->getBufferStart() &&
1097          "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
1098 
1099   const char *FirstNewLine = nullptr;
1100   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1101 
1102   // For CHECK-EMPTY, the preceding new line is consumed by the pattern, so
1103   // this needs to be re-added.
1104   if (Pat.getCheckTy() == Check::CheckEmpty)
1105     ++NumNewLines;
1106 
1107   if (NumNewLines == 0) {
1108     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1109                     CheckName + ": is on the same line as previous match");
1110     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1111                     "'next' match was here");
1112     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1113                     "previous match ended here");
1114     return true;
1115   }
1116 
1117   if (NumNewLines != 1) {
1118     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1119                     CheckName +
1120                         ": is not on the line after the previous match");
1121     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1122                     "'next' match was here");
1123     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1124                     "previous match ended here");
1125     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1126                     "non-matching line after previous match is here");
1127     return true;
1128   }
1129 
1130   return false;
1131 }
1132 
1133 /// Verify there is no newline in the given buffer.
1134 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1135   if (Pat.getCheckTy() != Check::CheckSame)
1136     return false;
1137 
1138   // Count the number of newlines between the previous match and this one.
1139   assert(Buffer.data() !=
1140              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1141                                     SMLoc::getFromPointer(Buffer.data())))
1142                  ->getBufferStart() &&
1143          "CHECK-SAME can't be the first check in a file");
1144 
1145   const char *FirstNewLine = nullptr;
1146   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1147 
1148   if (NumNewLines != 0) {
1149     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1150                     Prefix +
1151                         "-SAME: is not on the same line as the previous match");
1152     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1153                     "'next' match was here");
1154     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1155                     "previous match ended here");
1156     return true;
1157   }
1158 
1159   return false;
1160 }
1161 
1162 /// Verify there's no "not strings" in the given buffer.
1163 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1164                            const std::vector<const Pattern *> &NotStrings,
1165                            StringMap<StringRef> &VariableTable) const {
1166   for (const Pattern *Pat : NotStrings) {
1167     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1168 
1169     size_t MatchLen = 0;
1170     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1171 
1172     if (Pos == StringRef::npos)
1173       continue;
1174 
1175     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
1176                     SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
1177     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1178                     Prefix + "-NOT: pattern specified here");
1179     return true;
1180   }
1181 
1182   return false;
1183 }
1184 
1185 /// Match "dag strings" and their mixed "not strings".
1186 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1187                              std::vector<const Pattern *> &NotStrings,
1188                              StringMap<StringRef> &VariableTable) const {
1189   if (DagNotStrings.empty())
1190     return 0;
1191 
1192   size_t LastPos = 0;
1193   size_t StartPos = LastPos;
1194 
1195   for (const Pattern &Pat : DagNotStrings) {
1196     assert((Pat.getCheckTy() == Check::CheckDAG ||
1197             Pat.getCheckTy() == Check::CheckNot) &&
1198            "Invalid CHECK-DAG or CHECK-NOT!");
1199 
1200     if (Pat.getCheckTy() == Check::CheckNot) {
1201       NotStrings.push_back(&Pat);
1202       continue;
1203     }
1204 
1205     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1206 
1207     size_t MatchLen = 0, MatchPos;
1208 
1209     // CHECK-DAG always matches from the start.
1210     StringRef MatchBuffer = Buffer.substr(StartPos);
1211     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1212     // With a group of CHECK-DAGs, a single mismatching means the match on
1213     // that group of CHECK-DAGs fails immediately.
1214     if (MatchPos == StringRef::npos) {
1215       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1216       return StringRef::npos;
1217     }
1218     // Re-calc it as the offset relative to the start of the original string.
1219     MatchPos += StartPos;
1220 
1221     if (!NotStrings.empty()) {
1222       if (MatchPos < LastPos) {
1223         // Reordered?
1224         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1225                         SourceMgr::DK_Error,
1226                         Prefix + "-DAG: found a match of CHECK-DAG"
1227                                  " reordering across a CHECK-NOT");
1228         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1229                         SourceMgr::DK_Note,
1230                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1231                                  " is found here");
1232         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1233                         Prefix + "-NOT: the crossed pattern specified"
1234                                  " here");
1235         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1236                         Prefix + "-DAG: the reordered pattern specified"
1237                                  " here");
1238         return StringRef::npos;
1239       }
1240       // All subsequent CHECK-DAGs should be matched from the farthest
1241       // position of all precedent CHECK-DAGs (including this one.)
1242       StartPos = LastPos;
1243       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1244       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1245       // region.
1246       StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos);
1247       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1248         return StringRef::npos;
1249       // Clear "not strings".
1250       NotStrings.clear();
1251     }
1252 
1253     // Update the last position with CHECK-DAG matches.
1254     LastPos = std::max(MatchPos + MatchLen, LastPos);
1255   }
1256 
1257   return LastPos;
1258 }
1259 
1260 // A check prefix must contain only alphanumeric, hyphens and underscores.
1261 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1262   Regex Validator("^[a-zA-Z0-9_-]*$");
1263   return Validator.match(CheckPrefix);
1264 }
1265 
1266 static bool ValidateCheckPrefixes() {
1267   StringSet<> PrefixSet;
1268 
1269   for (StringRef Prefix : CheckPrefixes) {
1270     // Reject empty prefixes.
1271     if (Prefix == "")
1272       return false;
1273 
1274     if (!PrefixSet.insert(Prefix).second)
1275       return false;
1276 
1277     if (!ValidateCheckPrefix(Prefix))
1278       return false;
1279   }
1280 
1281   return true;
1282 }
1283 
1284 // Combines the check prefixes into a single regex so that we can efficiently
1285 // scan for any of the set.
1286 //
1287 // The semantics are that the longest-match wins which matches our regex
1288 // library.
1289 static Regex buildCheckPrefixRegex() {
1290   // I don't think there's a way to specify an initial value for cl::list,
1291   // so if nothing was specified, add the default
1292   if (CheckPrefixes.empty())
1293     CheckPrefixes.push_back("CHECK");
1294 
1295   // We already validated the contents of CheckPrefixes so just concatenate
1296   // them as alternatives.
1297   SmallString<32> PrefixRegexStr;
1298   for (StringRef Prefix : CheckPrefixes) {
1299     if (Prefix != CheckPrefixes.front())
1300       PrefixRegexStr.push_back('|');
1301 
1302     PrefixRegexStr.append(Prefix);
1303   }
1304 
1305   return Regex(PrefixRegexStr);
1306 }
1307 
1308 static void DumpCommandLine(int argc, char **argv) {
1309   errs() << "FileCheck command line: ";
1310   for (int I = 0; I < argc; I++)
1311     errs() << " " << argv[I];
1312   errs() << "\n";
1313 }
1314 
1315 // Remove local variables from \p VariableTable. Global variables
1316 // (start with '$') are preserved.
1317 static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
1318   SmallVector<StringRef, 16> LocalVars;
1319   for (const auto &Var : VariableTable)
1320     if (Var.first()[0] != '$')
1321       LocalVars.push_back(Var.first());
1322 
1323   for (const auto &Var : LocalVars)
1324     VariableTable.erase(Var);
1325 }
1326 
1327 /// Check the input to FileCheck provided in the \p Buffer against the \p
1328 /// CheckStrings read from the check file.
1329 ///
1330 /// Returns false if the input fails to satisfy the checks.
1331 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1332                 ArrayRef<CheckString> CheckStrings) {
1333   bool ChecksFailed = false;
1334 
1335   /// VariableTable - This holds all the current filecheck variables.
1336   StringMap<StringRef> VariableTable;
1337 
1338   for (const auto& Def : GlobalDefines)
1339     VariableTable.insert(StringRef(Def).split('='));
1340 
1341   unsigned i = 0, j = 0, e = CheckStrings.size();
1342   while (true) {
1343     StringRef CheckRegion;
1344     if (j == e) {
1345       CheckRegion = Buffer;
1346     } else {
1347       const CheckString &CheckLabelStr = CheckStrings[j];
1348       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1349         ++j;
1350         continue;
1351       }
1352 
1353       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1354       size_t MatchLabelLen = 0;
1355       size_t MatchLabelPos =
1356           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
1357       if (MatchLabelPos == StringRef::npos)
1358         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1359         return false;
1360 
1361       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1362       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1363       ++j;
1364     }
1365 
1366     if (EnableVarScope)
1367       ClearLocalVars(VariableTable);
1368 
1369     for (; i != j; ++i) {
1370       const CheckString &CheckStr = CheckStrings[i];
1371 
1372       // Check each string within the scanned region, including a second check
1373       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1374       size_t MatchLen = 0;
1375       size_t MatchPos =
1376           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
1377 
1378       if (MatchPos == StringRef::npos) {
1379         ChecksFailed = true;
1380         i = j;
1381         break;
1382       }
1383 
1384       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1385     }
1386 
1387     if (j == e)
1388       break;
1389   }
1390 
1391   // Success if no checks failed.
1392   return !ChecksFailed;
1393 }
1394 
1395 int main(int argc, char **argv) {
1396   InitLLVM X(argc, argv);
1397   cl::ParseCommandLineOptions(argc, argv);
1398 
1399   if (!ValidateCheckPrefixes()) {
1400     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1401               "start with a letter and contain only alphanumeric characters, "
1402               "hyphens and underscores\n";
1403     return 2;
1404   }
1405 
1406   Regex PrefixRE = buildCheckPrefixRegex();
1407   std::string REError;
1408   if (!PrefixRE.isValid(REError)) {
1409     errs() << "Unable to combine check-prefix strings into a prefix regular "
1410               "expression! This is likely a bug in FileCheck's verification of "
1411               "the check-prefix strings. Regular expression parsing failed "
1412               "with the following error: "
1413            << REError << "\n";
1414     return 2;
1415   }
1416 
1417   SourceMgr SM;
1418 
1419   // Read the expected strings from the check file.
1420   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1421       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1422   if (std::error_code EC = CheckFileOrErr.getError()) {
1423     errs() << "Could not open check file '" << CheckFilename
1424            << "': " << EC.message() << '\n';
1425     return 2;
1426   }
1427   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1428 
1429   SmallString<4096> CheckFileBuffer;
1430   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
1431 
1432   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1433                             CheckFileText, CheckFile.getBufferIdentifier()),
1434                         SMLoc());
1435 
1436   std::vector<CheckString> CheckStrings;
1437   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
1438     return 2;
1439 
1440   // Open the file to check and add it to SourceMgr.
1441   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1442       MemoryBuffer::getFileOrSTDIN(InputFilename);
1443   if (std::error_code EC = InputFileOrErr.getError()) {
1444     errs() << "Could not open input file '" << InputFilename
1445            << "': " << EC.message() << '\n';
1446     return 2;
1447   }
1448   MemoryBuffer &InputFile = *InputFileOrErr.get();
1449 
1450   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1451     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1452     DumpCommandLine(argc, argv);
1453     return 2;
1454   }
1455 
1456   SmallString<4096> InputFileBuffer;
1457   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
1458 
1459   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1460                             InputFileText, InputFile.getBufferIdentifier()),
1461                         SMLoc());
1462 
1463   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1464 }
1465