xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision 46e1fd61021ff06966e672cb38cba0278b9d0b0d)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an exit status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43                   cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string> CheckPrefixes(
46     "check-prefix",
47     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool> NoCanonicalizeWhiteSpace(
55     "strict-whitespace",
56     cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::list<std::string> GlobalDefines("D", cl::Prefix,
66     cl::desc("Define a variable to be used in capture patterns."),
67     cl::value_desc("VAR=VALUE"));
68 
69 static cl::opt<bool> AllowEmptyInput(
70     "allow-empty", cl::init(false),
71     cl::desc("Allow the input file to be empty. This is useful when making\n"
72              "checks that some error message does not occur, for example."));
73 
74 static cl::opt<bool> MatchFullLines(
75     "match-full-lines", cl::init(false),
76     cl::desc("Require all positive matches to cover an entire input line.\n"
77              "Allows leading and trailing whitespace if --strict-whitespace\n"
78              "is not also passed."));
79 
80 static cl::opt<bool> EnableVarScope(
81     "enable-var-scope", cl::init(false),
82     cl::desc("Enables scope for regex variables. Variables with names that\n"
83              "do not start with '$' will be reset at the beginning of\n"
84              "each CHECK-LABEL block."));
85 
86 typedef cl::list<std::string>::const_iterator prefix_iterator;
87 
88 //===----------------------------------------------------------------------===//
89 // Pattern Handling Code.
90 //===----------------------------------------------------------------------===//
91 
92 namespace Check {
93 enum CheckType {
94   CheckNone = 0,
95   CheckPlain,
96   CheckNext,
97   CheckSame,
98   CheckNot,
99   CheckDAG,
100   CheckLabel,
101 
102   /// Indicates the pattern only matches the end of file. This is used for
103   /// trailing CHECK-NOTs.
104   CheckEOF,
105 
106   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
107   CheckBadNot
108 };
109 }
110 
111 class Pattern {
112   SMLoc PatternLoc;
113 
114   /// A fixed string to match as the pattern or empty if this pattern requires
115   /// a regex match.
116   StringRef FixedStr;
117 
118   /// A regex string to match as the pattern or empty if this pattern requires
119   /// a fixed string to match.
120   std::string RegExStr;
121 
122   /// Entries in this vector map to uses of a variable in the pattern, e.g.
123   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
124   /// we'll get an entry in this vector that tells us to insert the value of
125   /// bar at offset 3.
126   std::vector<std::pair<StringRef, unsigned>> VariableUses;
127 
128   /// Maps definitions of variables to their parenthesized capture numbers.
129   ///
130   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
131   /// 1.
132   std::map<StringRef, unsigned> VariableDefs;
133 
134   Check::CheckType CheckTy;
135 
136   /// Contains the number of line this pattern is in.
137   unsigned LineNumber;
138 
139 public:
140   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
141 
142   /// Returns the location in source code.
143   SMLoc getLoc() const { return PatternLoc; }
144 
145   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
146                     unsigned LineNumber);
147   size_t Match(StringRef Buffer, size_t &MatchLen,
148                StringMap<StringRef> &VariableTable) const;
149   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
150                         const StringMap<StringRef> &VariableTable) const;
151 
152   bool hasVariable() const {
153     return !(VariableUses.empty() && VariableDefs.empty());
154   }
155 
156   Check::CheckType getCheckTy() const { return CheckTy; }
157 
158 private:
159   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
160   void AddBackrefToRegEx(unsigned BackrefNum);
161   unsigned
162   ComputeMatchDistance(StringRef Buffer,
163                        const StringMap<StringRef> &VariableTable) const;
164   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
165   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
166 };
167 
168 /// Parses the given string into the Pattern.
169 ///
170 /// \p Prefix provides which prefix is being matched, \p SM provides the
171 /// SourceMgr used for error reports, and \p LineNumber is the line number in
172 /// the input file from which the pattern string was read. Returns true in
173 /// case of an error, false otherwise.
174 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
175                            SourceMgr &SM, unsigned LineNumber) {
176   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
177 
178   this->LineNumber = LineNumber;
179   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
180 
181   if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
182     // Ignore trailing whitespace.
183     while (!PatternStr.empty() &&
184            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
185       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
186 
187   // Check that there is something on the line.
188   if (PatternStr.empty()) {
189     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
190                     "found empty check string with prefix '" + Prefix + ":'");
191     return true;
192   }
193 
194   // Check to see if this is a fixed string, or if it has regex pieces.
195   if (!MatchFullLinesHere &&
196       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
197                                  PatternStr.find("[[") == StringRef::npos))) {
198     FixedStr = PatternStr;
199     return false;
200   }
201 
202   if (MatchFullLinesHere) {
203     RegExStr += '^';
204     if (!NoCanonicalizeWhiteSpace)
205       RegExStr += " *";
206   }
207 
208   // Paren value #0 is for the fully matched string.  Any new parenthesized
209   // values add from there.
210   unsigned CurParen = 1;
211 
212   // Otherwise, there is at least one regex piece.  Build up the regex pattern
213   // by escaping scary characters in fixed strings, building up one big regex.
214   while (!PatternStr.empty()) {
215     // RegEx matches.
216     if (PatternStr.startswith("{{")) {
217       // This is the start of a regex match.  Scan for the }}.
218       size_t End = PatternStr.find("}}");
219       if (End == StringRef::npos) {
220         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
221                         SourceMgr::DK_Error,
222                         "found start of regex string with no end '}}'");
223         return true;
224       }
225 
226       // Enclose {{}} patterns in parens just like [[]] even though we're not
227       // capturing the result for any purpose.  This is required in case the
228       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
229       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
230       RegExStr += '(';
231       ++CurParen;
232 
233       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
234         return true;
235       RegExStr += ')';
236 
237       PatternStr = PatternStr.substr(End + 2);
238       continue;
239     }
240 
241     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
242     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
243     // second form is [[foo]] which is a reference to foo.  The variable name
244     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
245     // it.  This is to catch some common errors.
246     if (PatternStr.startswith("[[")) {
247       // Find the closing bracket pair ending the match.  End is going to be an
248       // offset relative to the beginning of the match string.
249       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
250 
251       if (End == StringRef::npos) {
252         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
253                         SourceMgr::DK_Error,
254                         "invalid named regex reference, no ]] found");
255         return true;
256       }
257 
258       StringRef MatchStr = PatternStr.substr(2, End);
259       PatternStr = PatternStr.substr(End + 4);
260 
261       // Get the regex name (e.g. "foo").
262       size_t NameEnd = MatchStr.find(':');
263       StringRef Name = MatchStr.substr(0, NameEnd);
264 
265       if (Name.empty()) {
266         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
267                         "invalid name in named regex: empty name");
268         return true;
269       }
270 
271       // Verify that the name/expression is well formed. FileCheck currently
272       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
273       // is relaxed, more strict check is performed in \c EvaluateExpression.
274       bool IsExpression = false;
275       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
276         if (i == 0) {
277           if (Name[i] == '$')  // Global vars start with '$'
278             continue;
279           if (Name[i] == '@') {
280             if (NameEnd != StringRef::npos) {
281               SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
282                               SourceMgr::DK_Error,
283                               "invalid name in named regex definition");
284               return true;
285             }
286             IsExpression = true;
287             continue;
288           }
289         }
290         if (Name[i] != '_' && !isalnum(Name[i]) &&
291             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
292           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
293                           SourceMgr::DK_Error, "invalid name in named regex");
294           return true;
295         }
296       }
297 
298       // Name can't start with a digit.
299       if (isdigit(static_cast<unsigned char>(Name[0]))) {
300         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
301                         "invalid name in named regex");
302         return true;
303       }
304 
305       // Handle [[foo]].
306       if (NameEnd == StringRef::npos) {
307         // Handle variables that were defined earlier on the same line by
308         // emitting a backreference.
309         if (VariableDefs.find(Name) != VariableDefs.end()) {
310           unsigned VarParenNum = VariableDefs[Name];
311           if (VarParenNum < 1 || VarParenNum > 9) {
312             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
313                             SourceMgr::DK_Error,
314                             "Can't back-reference more than 9 variables");
315             return true;
316           }
317           AddBackrefToRegEx(VarParenNum);
318         } else {
319           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
320         }
321         continue;
322       }
323 
324       // Handle [[foo:.*]].
325       VariableDefs[Name] = CurParen;
326       RegExStr += '(';
327       ++CurParen;
328 
329       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
330         return true;
331 
332       RegExStr += ')';
333     }
334 
335     // Handle fixed string matches.
336     // Find the end, which is the start of the next regex.
337     size_t FixedMatchEnd = PatternStr.find("{{");
338     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
339     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
340     PatternStr = PatternStr.substr(FixedMatchEnd);
341   }
342 
343   if (MatchFullLinesHere) {
344     if (!NoCanonicalizeWhiteSpace)
345       RegExStr += " *";
346     RegExStr += '$';
347   }
348 
349   return false;
350 }
351 
352 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
353   Regex R(RS);
354   std::string Error;
355   if (!R.isValid(Error)) {
356     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
357                     "invalid regex: " + Error);
358     return true;
359   }
360 
361   RegExStr += RS.str();
362   CurParen += R.getNumMatches();
363   return false;
364 }
365 
366 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
367   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
368   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
369   RegExStr += Backref;
370 }
371 
372 /// Evaluates expression and stores the result to \p Value.
373 ///
374 /// Returns true on success and false when the expression has invalid syntax.
375 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
376   // The only supported expression is @LINE([\+-]\d+)?
377   if (!Expr.startswith("@LINE"))
378     return false;
379   Expr = Expr.substr(StringRef("@LINE").size());
380   int Offset = 0;
381   if (!Expr.empty()) {
382     if (Expr[0] == '+')
383       Expr = Expr.substr(1);
384     else if (Expr[0] != '-')
385       return false;
386     if (Expr.getAsInteger(10, Offset))
387       return false;
388   }
389   Value = llvm::itostr(LineNumber + Offset);
390   return true;
391 }
392 
393 /// Matches the pattern string against the input buffer \p Buffer
394 ///
395 /// This returns the position that is matched or npos if there is no match. If
396 /// there is a match, the size of the matched string is returned in \p
397 /// MatchLen.
398 ///
399 /// The \p VariableTable StringMap provides the current values of filecheck
400 /// variables and is updated if this match defines new values.
401 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
402                       StringMap<StringRef> &VariableTable) const {
403   // If this is the EOF pattern, match it immediately.
404   if (CheckTy == Check::CheckEOF) {
405     MatchLen = 0;
406     return Buffer.size();
407   }
408 
409   // If this is a fixed string pattern, just match it now.
410   if (!FixedStr.empty()) {
411     MatchLen = FixedStr.size();
412     return Buffer.find(FixedStr);
413   }
414 
415   // Regex match.
416 
417   // If there are variable uses, we need to create a temporary string with the
418   // actual value.
419   StringRef RegExToMatch = RegExStr;
420   std::string TmpStr;
421   if (!VariableUses.empty()) {
422     TmpStr = RegExStr;
423 
424     unsigned InsertOffset = 0;
425     for (const auto &VariableUse : VariableUses) {
426       std::string Value;
427 
428       if (VariableUse.first[0] == '@') {
429         if (!EvaluateExpression(VariableUse.first, Value))
430           return StringRef::npos;
431       } else {
432         StringMap<StringRef>::iterator it =
433             VariableTable.find(VariableUse.first);
434         // If the variable is undefined, return an error.
435         if (it == VariableTable.end())
436           return StringRef::npos;
437 
438         // Look up the value and escape it so that we can put it into the regex.
439         Value += Regex::escape(it->second);
440       }
441 
442       // Plop it into the regex at the adjusted offset.
443       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
444                     Value.begin(), Value.end());
445       InsertOffset += Value.size();
446     }
447 
448     // Match the newly constructed regex.
449     RegExToMatch = TmpStr;
450   }
451 
452   SmallVector<StringRef, 4> MatchInfo;
453   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
454     return StringRef::npos;
455 
456   // Successful regex match.
457   assert(!MatchInfo.empty() && "Didn't get any match");
458   StringRef FullMatch = MatchInfo[0];
459 
460   // If this defines any variables, remember their values.
461   for (const auto &VariableDef : VariableDefs) {
462     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
463     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
464   }
465 
466   MatchLen = FullMatch.size();
467   return FullMatch.data() - Buffer.data();
468 }
469 
470 
471 /// Computes an arbitrary estimate for the quality of matching this pattern at
472 /// the start of \p Buffer; a distance of zero should correspond to a perfect
473 /// match.
474 unsigned
475 Pattern::ComputeMatchDistance(StringRef Buffer,
476                               const StringMap<StringRef> &VariableTable) const {
477   // Just compute the number of matching characters. For regular expressions, we
478   // just compare against the regex itself and hope for the best.
479   //
480   // FIXME: One easy improvement here is have the regex lib generate a single
481   // example regular expression which matches, and use that as the example
482   // string.
483   StringRef ExampleString(FixedStr);
484   if (ExampleString.empty())
485     ExampleString = RegExStr;
486 
487   // Only compare up to the first line in the buffer, or the string size.
488   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
489   BufferPrefix = BufferPrefix.split('\n').first;
490   return BufferPrefix.edit_distance(ExampleString);
491 }
492 
493 /// Prints additional information about a failure to match involving this
494 /// pattern.
495 void Pattern::PrintFailureInfo(
496     const SourceMgr &SM, StringRef Buffer,
497     const StringMap<StringRef> &VariableTable) const {
498   // If this was a regular expression using variables, print the current
499   // variable values.
500   if (!VariableUses.empty()) {
501     for (const auto &VariableUse : VariableUses) {
502       SmallString<256> Msg;
503       raw_svector_ostream OS(Msg);
504       StringRef Var = VariableUse.first;
505       if (Var[0] == '@') {
506         std::string Value;
507         if (EvaluateExpression(Var, Value)) {
508           OS << "with expression \"";
509           OS.write_escaped(Var) << "\" equal to \"";
510           OS.write_escaped(Value) << "\"";
511         } else {
512           OS << "uses incorrect expression \"";
513           OS.write_escaped(Var) << "\"";
514         }
515       } else {
516         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
517 
518         // Check for undefined variable references.
519         if (it == VariableTable.end()) {
520           OS << "uses undefined variable \"";
521           OS.write_escaped(Var) << "\"";
522         } else {
523           OS << "with variable \"";
524           OS.write_escaped(Var) << "\" equal to \"";
525           OS.write_escaped(it->second) << "\"";
526         }
527       }
528 
529       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
530                       OS.str());
531     }
532   }
533 
534   // Attempt to find the closest/best fuzzy match.  Usually an error happens
535   // because some string in the output didn't exactly match. In these cases, we
536   // would like to show the user a best guess at what "should have" matched, to
537   // save them having to actually check the input manually.
538   size_t NumLinesForward = 0;
539   size_t Best = StringRef::npos;
540   double BestQuality = 0;
541 
542   // Use an arbitrary 4k limit on how far we will search.
543   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
544     if (Buffer[i] == '\n')
545       ++NumLinesForward;
546 
547     // Patterns have leading whitespace stripped, so skip whitespace when
548     // looking for something which looks like a pattern.
549     if (Buffer[i] == ' ' || Buffer[i] == '\t')
550       continue;
551 
552     // Compute the "quality" of this match as an arbitrary combination of the
553     // match distance and the number of lines skipped to get to this match.
554     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
555     double Quality = Distance + (NumLinesForward / 100.);
556 
557     if (Quality < BestQuality || Best == StringRef::npos) {
558       Best = i;
559       BestQuality = Quality;
560     }
561   }
562 
563   // Print the "possible intended match here" line if we found something
564   // reasonable and not equal to what we showed in the "scanning from here"
565   // line.
566   if (Best && Best != StringRef::npos && BestQuality < 50) {
567     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
568                     SourceMgr::DK_Note, "possible intended match here");
569 
570     // FIXME: If we wanted to be really friendly we would show why the match
571     // failed, as it can be hard to spot simple one character differences.
572   }
573 }
574 
575 /// Finds the closing sequence of a regex variable usage or definition.
576 ///
577 /// \p Str has to point in the beginning of the definition (right after the
578 /// opening sequence). Returns the offset of the closing sequence within Str,
579 /// or npos if it was not found.
580 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
581   // Offset keeps track of the current offset within the input Str
582   size_t Offset = 0;
583   // [...] Nesting depth
584   size_t BracketDepth = 0;
585 
586   while (!Str.empty()) {
587     if (Str.startswith("]]") && BracketDepth == 0)
588       return Offset;
589     if (Str[0] == '\\') {
590       // Backslash escapes the next char within regexes, so skip them both.
591       Str = Str.substr(2);
592       Offset += 2;
593     } else {
594       switch (Str[0]) {
595       default:
596         break;
597       case '[':
598         BracketDepth++;
599         break;
600       case ']':
601         if (BracketDepth == 0) {
602           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
603                           SourceMgr::DK_Error,
604                           "missing closing \"]\" for regex variable");
605           exit(1);
606         }
607         BracketDepth--;
608         break;
609       }
610       Str = Str.substr(1);
611       Offset++;
612     }
613   }
614 
615   return StringRef::npos;
616 }
617 
618 //===----------------------------------------------------------------------===//
619 // Check Strings.
620 //===----------------------------------------------------------------------===//
621 
622 /// A check that we found in the input file.
623 struct CheckString {
624   /// The pattern to match.
625   Pattern Pat;
626 
627   /// Which prefix name this check matched.
628   StringRef Prefix;
629 
630   /// The location in the match file that the check string was specified.
631   SMLoc Loc;
632 
633   /// All of the strings that are disallowed from occurring between this match
634   /// string and the previous one (or start of file).
635   std::vector<Pattern> DagNotStrings;
636 
637   CheckString(const Pattern &P, StringRef S, SMLoc L)
638       : Pat(P), Prefix(S), Loc(L) {}
639 
640   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
641                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
642 
643   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
644   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
645   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
646                 const std::vector<const Pattern *> &NotStrings,
647                 StringMap<StringRef> &VariableTable) const;
648   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
649                   std::vector<const Pattern *> &NotStrings,
650                   StringMap<StringRef> &VariableTable) const;
651 };
652 
653 /// Canonicalize whitespaces in the file. Line endings are replaced with
654 /// UNIX-style '\n'.
655 static StringRef CanonicalizeFile(MemoryBuffer &MB,
656                                   SmallVectorImpl<char> &OutputBuffer) {
657   OutputBuffer.reserve(MB.getBufferSize());
658 
659   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
660        Ptr != End; ++Ptr) {
661     // Eliminate trailing dosish \r.
662     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
663       continue;
664     }
665 
666     // If current char is not a horizontal whitespace or if horizontal
667     // whitespace canonicalization is disabled, dump it to output as is.
668     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
669       OutputBuffer.push_back(*Ptr);
670       continue;
671     }
672 
673     // Otherwise, add one space and advance over neighboring space.
674     OutputBuffer.push_back(' ');
675     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
676       ++Ptr;
677   }
678 
679   // Add a null byte and then return all but that byte.
680   OutputBuffer.push_back('\0');
681   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
682 }
683 
684 static bool IsPartOfWord(char c) {
685   return (isalnum(c) || c == '-' || c == '_');
686 }
687 
688 // Get the size of the prefix extension.
689 static size_t CheckTypeSize(Check::CheckType Ty) {
690   switch (Ty) {
691   case Check::CheckNone:
692   case Check::CheckBadNot:
693     return 0;
694 
695   case Check::CheckPlain:
696     return sizeof(":") - 1;
697 
698   case Check::CheckNext:
699     return sizeof("-NEXT:") - 1;
700 
701   case Check::CheckSame:
702     return sizeof("-SAME:") - 1;
703 
704   case Check::CheckNot:
705     return sizeof("-NOT:") - 1;
706 
707   case Check::CheckDAG:
708     return sizeof("-DAG:") - 1;
709 
710   case Check::CheckLabel:
711     return sizeof("-LABEL:") - 1;
712 
713   case Check::CheckEOF:
714     llvm_unreachable("Should not be using EOF size");
715   }
716 
717   llvm_unreachable("Bad check type");
718 }
719 
720 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
721   char NextChar = Buffer[Prefix.size()];
722 
723   // Verify that the : is present after the prefix.
724   if (NextChar == ':')
725     return Check::CheckPlain;
726 
727   if (NextChar != '-')
728     return Check::CheckNone;
729 
730   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
731   if (Rest.startswith("NEXT:"))
732     return Check::CheckNext;
733 
734   if (Rest.startswith("SAME:"))
735     return Check::CheckSame;
736 
737   if (Rest.startswith("NOT:"))
738     return Check::CheckNot;
739 
740   if (Rest.startswith("DAG:"))
741     return Check::CheckDAG;
742 
743   if (Rest.startswith("LABEL:"))
744     return Check::CheckLabel;
745 
746   // You can't combine -NOT with another suffix.
747   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
748       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
749       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
750     return Check::CheckBadNot;
751 
752   return Check::CheckNone;
753 }
754 
755 // From the given position, find the next character after the word.
756 static size_t SkipWord(StringRef Str, size_t Loc) {
757   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
758     ++Loc;
759   return Loc;
760 }
761 
762 /// Search the buffer for the first prefix in the prefix regular expression.
763 ///
764 /// This searches the buffer using the provided regular expression, however it
765 /// enforces constraints beyond that:
766 /// 1) The found prefix must not be a suffix of something that looks like
767 ///    a valid prefix.
768 /// 2) The found prefix must be followed by a valid check type suffix using \c
769 ///    FindCheckType above.
770 ///
771 /// The first match of the regular expression to satisfy these two is returned,
772 /// otherwise an empty StringRef is returned to indicate failure.
773 ///
774 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
775 /// start at the beginning of the returned prefix, increment \p LineNumber for
776 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
777 /// check found by examining the suffix.
778 ///
779 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
780 /// is unspecified.
781 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
782                                          unsigned &LineNumber,
783                                          Check::CheckType &CheckTy) {
784   SmallVector<StringRef, 2> Matches;
785 
786   while (!Buffer.empty()) {
787     // Find the first (longest) match using the RE.
788     if (!PrefixRE.match(Buffer, &Matches))
789       // No match at all, bail.
790       return StringRef();
791 
792     StringRef Prefix = Matches[0];
793     Matches.clear();
794 
795     assert(Prefix.data() >= Buffer.data() &&
796            Prefix.data() < Buffer.data() + Buffer.size() &&
797            "Prefix doesn't start inside of buffer!");
798     size_t Loc = Prefix.data() - Buffer.data();
799     StringRef Skipped = Buffer.substr(0, Loc);
800     Buffer = Buffer.drop_front(Loc);
801     LineNumber += Skipped.count('\n');
802 
803     // Check that the matched prefix isn't a suffix of some other check-like
804     // word.
805     // FIXME: This is a very ad-hoc check. it would be better handled in some
806     // other way. Among other things it seems hard to distinguish between
807     // intentional and unintentional uses of this feature.
808     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
809       // Now extract the type.
810       CheckTy = FindCheckType(Buffer, Prefix);
811 
812       // If we've found a valid check type for this prefix, we're done.
813       if (CheckTy != Check::CheckNone)
814         return Prefix;
815     }
816 
817     // If we didn't successfully find a prefix, we need to skip this invalid
818     // prefix and continue scanning. We directly skip the prefix that was
819     // matched and any additional parts of that check-like word.
820     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
821   }
822 
823   // We ran out of buffer while skipping partial matches so give up.
824   return StringRef();
825 }
826 
827 /// Read the check file, which specifies the sequence of expected strings.
828 ///
829 /// The strings are added to the CheckStrings vector. Returns true in case of
830 /// an error, false otherwise.
831 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
832                           std::vector<CheckString> &CheckStrings) {
833   std::vector<Pattern> ImplicitNegativeChecks;
834   for (const auto &PatternString : ImplicitCheckNot) {
835     // Create a buffer with fake command line content in order to display the
836     // command line option responsible for the specific implicit CHECK-NOT.
837     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
838     std::string Suffix = "'";
839     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
840         Prefix + PatternString + Suffix, "command line");
841 
842     StringRef PatternInBuffer =
843         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
844     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
845 
846     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
847     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
848                                                "IMPLICIT-CHECK", SM, 0);
849   }
850 
851   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
852 
853   // LineNumber keeps track of the line on which CheckPrefix instances are
854   // found.
855   unsigned LineNumber = 1;
856 
857   while (1) {
858     Check::CheckType CheckTy;
859 
860     // See if a prefix occurs in the memory buffer.
861     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
862                                                    CheckTy);
863     if (UsedPrefix.empty())
864       break;
865     assert(UsedPrefix.data() == Buffer.data() &&
866            "Failed to move Buffer's start forward, or pointed prefix outside "
867            "of the buffer!");
868 
869     // Location to use for error messages.
870     const char *UsedPrefixStart = UsedPrefix.data();
871 
872     // Skip the buffer to the end.
873     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
874 
875     // Complain about useful-looking but unsupported suffixes.
876     if (CheckTy == Check::CheckBadNot) {
877       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
878                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
879       return true;
880     }
881 
882     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
883     // leading whitespace.
884     if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
885       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
886 
887     // Scan ahead to the end of line.
888     size_t EOL = Buffer.find_first_of("\n\r");
889 
890     // Remember the location of the start of the pattern, for diagnostics.
891     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
892 
893     // Parse the pattern.
894     Pattern P(CheckTy);
895     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
896       return true;
897 
898     // Verify that CHECK-LABEL lines do not define or use variables
899     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
900       SM.PrintMessage(
901           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
902           "found '" + UsedPrefix + "-LABEL:'"
903                                    " with variable definition or use");
904       return true;
905     }
906 
907     Buffer = Buffer.substr(EOL);
908 
909     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
910     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
911         CheckStrings.empty()) {
912       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
913       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
914                       SourceMgr::DK_Error,
915                       "found '" + UsedPrefix + "-" + Type +
916                           "' without previous '" + UsedPrefix + ": line");
917       return true;
918     }
919 
920     // Handle CHECK-DAG/-NOT.
921     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
922       DagNotMatches.push_back(P);
923       continue;
924     }
925 
926     // Okay, add the string we captured to the output vector and move on.
927     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
928     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
929     DagNotMatches = ImplicitNegativeChecks;
930   }
931 
932   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
933   // prefix as a filler for the error message.
934   if (!DagNotMatches.empty()) {
935     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
936                               SMLoc::getFromPointer(Buffer.data()));
937     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
938   }
939 
940   if (CheckStrings.empty()) {
941     errs() << "error: no check strings found with prefix"
942            << (CheckPrefixes.size() > 1 ? "es " : " ");
943     prefix_iterator I = CheckPrefixes.begin();
944     prefix_iterator E = CheckPrefixes.end();
945     if (I != E) {
946       errs() << "\'" << *I << ":'";
947       ++I;
948     }
949     for (; I != E; ++I)
950       errs() << ", \'" << *I << ":'";
951 
952     errs() << '\n';
953     return true;
954   }
955 
956   return false;
957 }
958 
959 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
960                              StringRef Buffer,
961                              StringMap<StringRef> &VariableTable) {
962   // Otherwise, we have an error, emit an error message.
963   SM.PrintMessage(Loc, SourceMgr::DK_Error,
964                   "expected string not found in input");
965 
966   // Print the "scanning from here" line.  If the current position is at the
967   // end of a line, advance to the start of the next line.
968   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
969 
970   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
971                   "scanning from here");
972 
973   // Allow the pattern to print additional information if desired.
974   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
975 }
976 
977 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
978                              StringRef Buffer,
979                              StringMap<StringRef> &VariableTable) {
980   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
981 }
982 
983 /// Count the number of newlines in the specified range.
984 static unsigned CountNumNewlinesBetween(StringRef Range,
985                                         const char *&FirstNewLine) {
986   unsigned NumNewLines = 0;
987   while (1) {
988     // Scan for newline.
989     Range = Range.substr(Range.find_first_of("\n\r"));
990     if (Range.empty())
991       return NumNewLines;
992 
993     ++NumNewLines;
994 
995     // Handle \n\r and \r\n as a single newline.
996     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
997         (Range[0] != Range[1]))
998       Range = Range.substr(1);
999     Range = Range.substr(1);
1000 
1001     if (NumNewLines == 1)
1002       FirstNewLine = Range.begin();
1003   }
1004 }
1005 
1006 /// Match check string and its "not strings" and/or "dag strings".
1007 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1008                           bool IsLabelScanMode, size_t &MatchLen,
1009                           StringMap<StringRef> &VariableTable) const {
1010   size_t LastPos = 0;
1011   std::vector<const Pattern *> NotStrings;
1012 
1013   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1014   // bounds; we have not processed variable definitions within the bounded block
1015   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1016   // over the block again (including the last CHECK-LABEL) in normal mode.
1017   if (!IsLabelScanMode) {
1018     // Match "dag strings" (with mixed "not strings" if any).
1019     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1020     if (LastPos == StringRef::npos)
1021       return StringRef::npos;
1022   }
1023 
1024   // Match itself from the last position after matching CHECK-DAG.
1025   StringRef MatchBuffer = Buffer.substr(LastPos);
1026   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1027   if (MatchPos == StringRef::npos) {
1028     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1029     return StringRef::npos;
1030   }
1031 
1032   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1033   // or CHECK-NOT
1034   if (!IsLabelScanMode) {
1035     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1036 
1037     // If this check is a "CHECK-NEXT", verify that the previous match was on
1038     // the previous line (i.e. that there is one newline between them).
1039     if (CheckNext(SM, SkippedRegion))
1040       return StringRef::npos;
1041 
1042     // If this check is a "CHECK-SAME", verify that the previous match was on
1043     // the same line (i.e. that there is no newline between them).
1044     if (CheckSame(SM, SkippedRegion))
1045       return StringRef::npos;
1046 
1047     // If this match had "not strings", verify that they don't exist in the
1048     // skipped region.
1049     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1050       return StringRef::npos;
1051   }
1052 
1053   return LastPos + MatchPos;
1054 }
1055 
1056 /// Verify there is a single line in the given buffer.
1057 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1058   if (Pat.getCheckTy() != Check::CheckNext)
1059     return false;
1060 
1061   // Count the number of newlines between the previous match and this one.
1062   assert(Buffer.data() !=
1063              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1064                                     SMLoc::getFromPointer(Buffer.data())))
1065                  ->getBufferStart() &&
1066          "CHECK-NEXT can't be the first check in a file");
1067 
1068   const char *FirstNewLine = nullptr;
1069   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1070 
1071   if (NumNewLines == 0) {
1072     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1073                     Prefix + "-NEXT: is on the same line as previous match");
1074     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1075                     "'next' match was here");
1076     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1077                     "previous match ended here");
1078     return true;
1079   }
1080 
1081   if (NumNewLines != 1) {
1082     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1083                     Prefix +
1084                         "-NEXT: is not on the line after the previous match");
1085     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1086                     "'next' match was here");
1087     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1088                     "previous match ended here");
1089     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1090                     "non-matching line after previous match is here");
1091     return true;
1092   }
1093 
1094   return false;
1095 }
1096 
1097 /// Verify there is no newline in the given buffer.
1098 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1099   if (Pat.getCheckTy() != Check::CheckSame)
1100     return false;
1101 
1102   // Count the number of newlines between the previous match and this one.
1103   assert(Buffer.data() !=
1104              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1105                                     SMLoc::getFromPointer(Buffer.data())))
1106                  ->getBufferStart() &&
1107          "CHECK-SAME can't be the first check in a file");
1108 
1109   const char *FirstNewLine = nullptr;
1110   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1111 
1112   if (NumNewLines != 0) {
1113     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1114                     Prefix +
1115                         "-SAME: is not on the same line as the previous match");
1116     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1117                     "'next' match was here");
1118     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1119                     "previous match ended here");
1120     return true;
1121   }
1122 
1123   return false;
1124 }
1125 
1126 /// Verify there's no "not strings" in the given buffer.
1127 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1128                            const std::vector<const Pattern *> &NotStrings,
1129                            StringMap<StringRef> &VariableTable) const {
1130   for (const Pattern *Pat : NotStrings) {
1131     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1132 
1133     size_t MatchLen = 0;
1134     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1135 
1136     if (Pos == StringRef::npos)
1137       continue;
1138 
1139     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
1140                     SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
1141     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1142                     Prefix + "-NOT: pattern specified here");
1143     return true;
1144   }
1145 
1146   return false;
1147 }
1148 
1149 /// Match "dag strings" and their mixed "not strings".
1150 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1151                              std::vector<const Pattern *> &NotStrings,
1152                              StringMap<StringRef> &VariableTable) const {
1153   if (DagNotStrings.empty())
1154     return 0;
1155 
1156   size_t LastPos = 0;
1157   size_t StartPos = LastPos;
1158 
1159   for (const Pattern &Pat : DagNotStrings) {
1160     assert((Pat.getCheckTy() == Check::CheckDAG ||
1161             Pat.getCheckTy() == Check::CheckNot) &&
1162            "Invalid CHECK-DAG or CHECK-NOT!");
1163 
1164     if (Pat.getCheckTy() == Check::CheckNot) {
1165       NotStrings.push_back(&Pat);
1166       continue;
1167     }
1168 
1169     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1170 
1171     size_t MatchLen = 0, MatchPos;
1172 
1173     // CHECK-DAG always matches from the start.
1174     StringRef MatchBuffer = Buffer.substr(StartPos);
1175     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1176     // With a group of CHECK-DAGs, a single mismatching means the match on
1177     // that group of CHECK-DAGs fails immediately.
1178     if (MatchPos == StringRef::npos) {
1179       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1180       return StringRef::npos;
1181     }
1182     // Re-calc it as the offset relative to the start of the original string.
1183     MatchPos += StartPos;
1184 
1185     if (!NotStrings.empty()) {
1186       if (MatchPos < LastPos) {
1187         // Reordered?
1188         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1189                         SourceMgr::DK_Error,
1190                         Prefix + "-DAG: found a match of CHECK-DAG"
1191                                  " reordering across a CHECK-NOT");
1192         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1193                         SourceMgr::DK_Note,
1194                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1195                                  " is found here");
1196         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1197                         Prefix + "-NOT: the crossed pattern specified"
1198                                  " here");
1199         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1200                         Prefix + "-DAG: the reordered pattern specified"
1201                                  " here");
1202         return StringRef::npos;
1203       }
1204       // All subsequent CHECK-DAGs should be matched from the farthest
1205       // position of all precedent CHECK-DAGs (including this one.)
1206       StartPos = LastPos;
1207       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1208       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1209       // region.
1210       StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos);
1211       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1212         return StringRef::npos;
1213       // Clear "not strings".
1214       NotStrings.clear();
1215     }
1216 
1217     // Update the last position with CHECK-DAG matches.
1218     LastPos = std::max(MatchPos + MatchLen, LastPos);
1219   }
1220 
1221   return LastPos;
1222 }
1223 
1224 // A check prefix must contain only alphanumeric, hyphens and underscores.
1225 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1226   Regex Validator("^[a-zA-Z0-9_-]*$");
1227   return Validator.match(CheckPrefix);
1228 }
1229 
1230 static bool ValidateCheckPrefixes() {
1231   StringSet<> PrefixSet;
1232 
1233   for (StringRef Prefix : CheckPrefixes) {
1234     // Reject empty prefixes.
1235     if (Prefix == "")
1236       return false;
1237 
1238     if (!PrefixSet.insert(Prefix).second)
1239       return false;
1240 
1241     if (!ValidateCheckPrefix(Prefix))
1242       return false;
1243   }
1244 
1245   return true;
1246 }
1247 
1248 // Combines the check prefixes into a single regex so that we can efficiently
1249 // scan for any of the set.
1250 //
1251 // The semantics are that the longest-match wins which matches our regex
1252 // library.
1253 static Regex buildCheckPrefixRegex() {
1254   // I don't think there's a way to specify an initial value for cl::list,
1255   // so if nothing was specified, add the default
1256   if (CheckPrefixes.empty())
1257     CheckPrefixes.push_back("CHECK");
1258 
1259   // We already validated the contents of CheckPrefixes so just concatenate
1260   // them as alternatives.
1261   SmallString<32> PrefixRegexStr;
1262   for (StringRef Prefix : CheckPrefixes) {
1263     if (Prefix != CheckPrefixes.front())
1264       PrefixRegexStr.push_back('|');
1265 
1266     PrefixRegexStr.append(Prefix);
1267   }
1268 
1269   return Regex(PrefixRegexStr);
1270 }
1271 
1272 static void DumpCommandLine(int argc, char **argv) {
1273   errs() << "FileCheck command line: ";
1274   for (int I = 0; I < argc; I++)
1275     errs() << " " << argv[I];
1276   errs() << "\n";
1277 }
1278 
1279 // Remove local variables from \p VariableTable. Global variables
1280 // (start with '$') are preserved.
1281 static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
1282   SmallVector<StringRef, 16> LocalVars;
1283   for (const auto &Var : VariableTable)
1284     if (Var.first()[0] != '$')
1285       LocalVars.push_back(Var.first());
1286 
1287   for (const auto &Var : LocalVars)
1288     VariableTable.erase(Var);
1289 }
1290 
1291 /// Check the input to FileCheck provided in the \p Buffer against the \p
1292 /// CheckStrings read from the check file.
1293 ///
1294 /// Returns false if the input fails to satisfy the checks.
1295 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1296                 ArrayRef<CheckString> CheckStrings) {
1297   bool ChecksFailed = false;
1298 
1299   /// VariableTable - This holds all the current filecheck variables.
1300   StringMap<StringRef> VariableTable;
1301 
1302   for (const auto& Def : GlobalDefines)
1303     VariableTable.insert(StringRef(Def).split('='));
1304 
1305   unsigned i = 0, j = 0, e = CheckStrings.size();
1306   while (true) {
1307     StringRef CheckRegion;
1308     if (j == e) {
1309       CheckRegion = Buffer;
1310     } else {
1311       const CheckString &CheckLabelStr = CheckStrings[j];
1312       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1313         ++j;
1314         continue;
1315       }
1316 
1317       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1318       size_t MatchLabelLen = 0;
1319       size_t MatchLabelPos =
1320           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
1321       if (MatchLabelPos == StringRef::npos)
1322         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1323         return false;
1324 
1325       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1326       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1327       ++j;
1328     }
1329 
1330     if (EnableVarScope)
1331       ClearLocalVars(VariableTable);
1332 
1333     for (; i != j; ++i) {
1334       const CheckString &CheckStr = CheckStrings[i];
1335 
1336       // Check each string within the scanned region, including a second check
1337       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1338       size_t MatchLen = 0;
1339       size_t MatchPos =
1340           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
1341 
1342       if (MatchPos == StringRef::npos) {
1343         ChecksFailed = true;
1344         i = j;
1345         break;
1346       }
1347 
1348       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1349     }
1350 
1351     if (j == e)
1352       break;
1353   }
1354 
1355   // Success if no checks failed.
1356   return !ChecksFailed;
1357 }
1358 
1359 int main(int argc, char **argv) {
1360   sys::PrintStackTraceOnErrorSignal(argv[0]);
1361   PrettyStackTraceProgram X(argc, argv);
1362   cl::ParseCommandLineOptions(argc, argv);
1363 
1364   if (!ValidateCheckPrefixes()) {
1365     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1366               "start with a letter and contain only alphanumeric characters, "
1367               "hyphens and underscores\n";
1368     return 2;
1369   }
1370 
1371   Regex PrefixRE = buildCheckPrefixRegex();
1372   std::string REError;
1373   if (!PrefixRE.isValid(REError)) {
1374     errs() << "Unable to combine check-prefix strings into a prefix regular "
1375               "expression! This is likely a bug in FileCheck's verification of "
1376               "the check-prefix strings. Regular expression parsing failed "
1377               "with the following error: "
1378            << REError << "\n";
1379     return 2;
1380   }
1381 
1382   SourceMgr SM;
1383 
1384   // Read the expected strings from the check file.
1385   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1386       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1387   if (std::error_code EC = CheckFileOrErr.getError()) {
1388     errs() << "Could not open check file '" << CheckFilename
1389            << "': " << EC.message() << '\n';
1390     return 2;
1391   }
1392   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1393 
1394   SmallString<4096> CheckFileBuffer;
1395   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
1396 
1397   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1398                             CheckFileText, CheckFile.getBufferIdentifier()),
1399                         SMLoc());
1400 
1401   std::vector<CheckString> CheckStrings;
1402   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
1403     return 2;
1404 
1405   // Open the file to check and add it to SourceMgr.
1406   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1407       MemoryBuffer::getFileOrSTDIN(InputFilename);
1408   if (std::error_code EC = InputFileOrErr.getError()) {
1409     errs() << "Could not open input file '" << InputFilename
1410            << "': " << EC.message() << '\n';
1411     return 2;
1412   }
1413   MemoryBuffer &InputFile = *InputFileOrErr.get();
1414 
1415   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1416     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1417     DumpCommandLine(argc, argv);
1418     return 2;
1419   }
1420 
1421   SmallString<4096> InputFileBuffer;
1422   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
1423 
1424   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1425                             InputFileText, InputFile.getBufferIdentifier()),
1426                         SMLoc());
1427 
1428   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1429 }
1430