xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision 83e63d96a9a949d3f2fcee6f663c4e4923b9da6f)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an exit status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43                   cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string> CheckPrefixes(
46     "check-prefix",
47     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool> NoCanonicalizeWhiteSpace(
55     "strict-whitespace",
56     cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::list<std::string> GlobalDefines("D", cl::Prefix,
66     cl::desc("Define a variable to be used in capture patterns."),
67     cl::value_desc("VAR=VALUE"));
68 
69 static cl::opt<bool> AllowEmptyInput(
70     "allow-empty", cl::init(false),
71     cl::desc("Allow the input file to be empty. This is useful when making\n"
72              "checks that some error message does not occur, for example."));
73 
74 static cl::opt<bool> MatchFullLines(
75     "match-full-lines", cl::init(false),
76     cl::desc("Require all positive matches to cover an entire input line.\n"
77              "Allows leading and trailing whitespace if --strict-whitespace\n"
78              "is not also passed."));
79 
80 static cl::opt<bool> EnableVarScope(
81     "enable-var-scope", cl::init(false),
82     cl::desc("Enables scope for regex variables. Variables with names that\n"
83              "do not start with '$' will be reset at the beginning of\n"
84              "each CHECK-LABEL block."));
85 
86 typedef cl::list<std::string>::const_iterator prefix_iterator;
87 
88 //===----------------------------------------------------------------------===//
89 // Pattern Handling Code.
90 //===----------------------------------------------------------------------===//
91 
92 namespace Check {
93 enum CheckType {
94   CheckNone = 0,
95   CheckPlain,
96   CheckNext,
97   CheckSame,
98   CheckNot,
99   CheckDAG,
100   CheckLabel,
101 
102   /// Indicates the pattern only matches the end of file. This is used for
103   /// trailing CHECK-NOTs.
104   CheckEOF,
105 
106   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
107   CheckBadNot
108 };
109 }
110 
111 class Pattern {
112   SMLoc PatternLoc;
113 
114   /// A fixed string to match as the pattern or empty if this pattern requires
115   /// a regex match.
116   StringRef FixedStr;
117 
118   /// A regex string to match as the pattern or empty if this pattern requires
119   /// a fixed string to match.
120   std::string RegExStr;
121 
122   /// Entries in this vector map to uses of a variable in the pattern, e.g.
123   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
124   /// we'll get an entry in this vector that tells us to insert the value of
125   /// bar at offset 3.
126   std::vector<std::pair<StringRef, unsigned>> VariableUses;
127 
128   /// Maps definitions of variables to their parenthesized capture numbers.
129   ///
130   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
131   /// 1.
132   std::map<StringRef, unsigned> VariableDefs;
133 
134   Check::CheckType CheckTy;
135 
136   /// Contains the number of line this pattern is in.
137   unsigned LineNumber;
138 
139 public:
140   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
141 
142   /// Returns the location in source code.
143   SMLoc getLoc() const { return PatternLoc; }
144 
145   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
146                     unsigned LineNumber);
147   size_t Match(StringRef Buffer, size_t &MatchLen,
148                StringMap<StringRef> &VariableTable) const;
149   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
150                         const StringMap<StringRef> &VariableTable) const;
151 
152   bool hasVariable() const {
153     return !(VariableUses.empty() && VariableDefs.empty());
154   }
155 
156   Check::CheckType getCheckTy() const { return CheckTy; }
157 
158 private:
159   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
160   void AddBackrefToRegEx(unsigned BackrefNum);
161   unsigned
162   ComputeMatchDistance(StringRef Buffer,
163                        const StringMap<StringRef> &VariableTable) const;
164   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
165   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
166 };
167 
168 /// Parses the given string into the Pattern.
169 ///
170 /// \p Prefix provides which prefix is being matched, \p SM provides the
171 /// SourceMgr used for error reports, and \p LineNumber is the line number in
172 /// the input file from which the pattern string was read. Returns true in
173 /// case of an error, false otherwise.
174 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
175                            SourceMgr &SM, unsigned LineNumber) {
176   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
177 
178   this->LineNumber = LineNumber;
179   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
180 
181   if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
182     // Ignore trailing whitespace.
183     while (!PatternStr.empty() &&
184            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
185       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
186 
187   // Check that there is something on the line.
188   if (PatternStr.empty()) {
189     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
190                     "found empty check string with prefix '" + Prefix + ":'");
191     return true;
192   }
193 
194   // Check to see if this is a fixed string, or if it has regex pieces.
195   if (!MatchFullLinesHere &&
196       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
197                                  PatternStr.find("[[") == StringRef::npos))) {
198     FixedStr = PatternStr;
199     return false;
200   }
201 
202   if (MatchFullLinesHere) {
203     RegExStr += '^';
204     if (!NoCanonicalizeWhiteSpace)
205       RegExStr += " *";
206   }
207 
208   // Paren value #0 is for the fully matched string.  Any new parenthesized
209   // values add from there.
210   unsigned CurParen = 1;
211 
212   // Otherwise, there is at least one regex piece.  Build up the regex pattern
213   // by escaping scary characters in fixed strings, building up one big regex.
214   while (!PatternStr.empty()) {
215     // RegEx matches.
216     if (PatternStr.startswith("{{")) {
217       // This is the start of a regex match.  Scan for the }}.
218       size_t End = PatternStr.find("}}");
219       if (End == StringRef::npos) {
220         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
221                         SourceMgr::DK_Error,
222                         "found start of regex string with no end '}}'");
223         return true;
224       }
225 
226       // Enclose {{}} patterns in parens just like [[]] even though we're not
227       // capturing the result for any purpose.  This is required in case the
228       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
229       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
230       RegExStr += '(';
231       ++CurParen;
232 
233       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
234         return true;
235       RegExStr += ')';
236 
237       PatternStr = PatternStr.substr(End + 2);
238       continue;
239     }
240 
241     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
242     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
243     // second form is [[foo]] which is a reference to foo.  The variable name
244     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
245     // it.  This is to catch some common errors.
246     if (PatternStr.startswith("[[")) {
247       // Find the closing bracket pair ending the match.  End is going to be an
248       // offset relative to the beginning of the match string.
249       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
250 
251       if (End == StringRef::npos) {
252         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
253                         SourceMgr::DK_Error,
254                         "invalid named regex reference, no ]] found");
255         return true;
256       }
257 
258       StringRef MatchStr = PatternStr.substr(2, End);
259       PatternStr = PatternStr.substr(End + 4);
260 
261       // Get the regex name (e.g. "foo").
262       size_t NameEnd = MatchStr.find(':');
263       StringRef Name = MatchStr.substr(0, NameEnd);
264 
265       if (Name.empty()) {
266         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
267                         "invalid name in named regex: empty name");
268         return true;
269       }
270 
271       // Verify that the name/expression is well formed. FileCheck currently
272       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
273       // is relaxed, more strict check is performed in \c EvaluateExpression.
274       bool IsExpression = false;
275       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
276         if (i == 0) {
277           if (Name[i] == '$')  // Global vars start with '$'
278             continue;
279           if (Name[i] == '@') {
280             if (NameEnd != StringRef::npos) {
281               SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
282                               SourceMgr::DK_Error,
283                               "invalid name in named regex definition");
284               return true;
285             }
286             IsExpression = true;
287             continue;
288           }
289         }
290         if (Name[i] != '_' && !isalnum(Name[i]) &&
291             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
292           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
293                           SourceMgr::DK_Error, "invalid name in named regex");
294           return true;
295         }
296       }
297 
298       // Name can't start with a digit.
299       if (isdigit(static_cast<unsigned char>(Name[0]))) {
300         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
301                         "invalid name in named regex");
302         return true;
303       }
304 
305       // Handle [[foo]].
306       if (NameEnd == StringRef::npos) {
307         // Handle variables that were defined earlier on the same line by
308         // emitting a backreference.
309         if (VariableDefs.find(Name) != VariableDefs.end()) {
310           unsigned VarParenNum = VariableDefs[Name];
311           if (VarParenNum < 1 || VarParenNum > 9) {
312             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
313                             SourceMgr::DK_Error,
314                             "Can't back-reference more than 9 variables");
315             return true;
316           }
317           AddBackrefToRegEx(VarParenNum);
318         } else {
319           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
320         }
321         continue;
322       }
323 
324       // Handle [[foo:.*]].
325       VariableDefs[Name] = CurParen;
326       RegExStr += '(';
327       ++CurParen;
328 
329       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
330         return true;
331 
332       RegExStr += ')';
333     }
334 
335     // Handle fixed string matches.
336     // Find the end, which is the start of the next regex.
337     size_t FixedMatchEnd = PatternStr.find("{{");
338     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
339     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
340     PatternStr = PatternStr.substr(FixedMatchEnd);
341   }
342 
343   if (MatchFullLinesHere) {
344     if (!NoCanonicalizeWhiteSpace)
345       RegExStr += " *";
346     RegExStr += '$';
347   }
348 
349   return false;
350 }
351 
352 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
353   Regex R(RS);
354   std::string Error;
355   if (!R.isValid(Error)) {
356     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
357                     "invalid regex: " + Error);
358     return true;
359   }
360 
361   RegExStr += RS.str();
362   CurParen += R.getNumMatches();
363   return false;
364 }
365 
366 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
367   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
368   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
369   RegExStr += Backref;
370 }
371 
372 /// Evaluates expression and stores the result to \p Value.
373 ///
374 /// Returns true on success and false when the expression has invalid syntax.
375 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
376   // The only supported expression is @LINE([\+-]\d+)?
377   if (!Expr.startswith("@LINE"))
378     return false;
379   Expr = Expr.substr(StringRef("@LINE").size());
380   int Offset = 0;
381   if (!Expr.empty()) {
382     if (Expr[0] == '+')
383       Expr = Expr.substr(1);
384     else if (Expr[0] != '-')
385       return false;
386     if (Expr.getAsInteger(10, Offset))
387       return false;
388   }
389   Value = llvm::itostr(LineNumber + Offset);
390   return true;
391 }
392 
393 /// Matches the pattern string against the input buffer \p Buffer
394 ///
395 /// This returns the position that is matched or npos if there is no match. If
396 /// there is a match, the size of the matched string is returned in \p
397 /// MatchLen.
398 ///
399 /// The \p VariableTable StringMap provides the current values of filecheck
400 /// variables and is updated if this match defines new values.
401 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
402                       StringMap<StringRef> &VariableTable) const {
403   // If this is the EOF pattern, match it immediately.
404   if (CheckTy == Check::CheckEOF) {
405     MatchLen = 0;
406     return Buffer.size();
407   }
408 
409   // If this is a fixed string pattern, just match it now.
410   if (!FixedStr.empty()) {
411     MatchLen = FixedStr.size();
412     return Buffer.find(FixedStr);
413   }
414 
415   // Regex match.
416 
417   // If there are variable uses, we need to create a temporary string with the
418   // actual value.
419   StringRef RegExToMatch = RegExStr;
420   std::string TmpStr;
421   if (!VariableUses.empty()) {
422     TmpStr = RegExStr;
423 
424     unsigned InsertOffset = 0;
425     for (const auto &VariableUse : VariableUses) {
426       std::string Value;
427 
428       if (VariableUse.first[0] == '@') {
429         if (!EvaluateExpression(VariableUse.first, Value))
430           return StringRef::npos;
431       } else {
432         StringMap<StringRef>::iterator it =
433             VariableTable.find(VariableUse.first);
434         // If the variable is undefined, return an error.
435         if (it == VariableTable.end())
436           return StringRef::npos;
437 
438         // Look up the value and escape it so that we can put it into the regex.
439         Value += Regex::escape(it->second);
440       }
441 
442       // Plop it into the regex at the adjusted offset.
443       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
444                     Value.begin(), Value.end());
445       InsertOffset += Value.size();
446     }
447 
448     // Match the newly constructed regex.
449     RegExToMatch = TmpStr;
450   }
451 
452   SmallVector<StringRef, 4> MatchInfo;
453   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
454     return StringRef::npos;
455 
456   // Successful regex match.
457   assert(!MatchInfo.empty() && "Didn't get any match");
458   StringRef FullMatch = MatchInfo[0];
459 
460   // If this defines any variables, remember their values.
461   for (const auto &VariableDef : VariableDefs) {
462     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
463     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
464   }
465 
466   MatchLen = FullMatch.size();
467   return FullMatch.data() - Buffer.data();
468 }
469 
470 
471 /// Computes an arbitrary estimate for the quality of matching this pattern at
472 /// the start of \p Buffer; a distance of zero should correspond to a perfect
473 /// match.
474 unsigned
475 Pattern::ComputeMatchDistance(StringRef Buffer,
476                               const StringMap<StringRef> &VariableTable) const {
477   // Just compute the number of matching characters. For regular expressions, we
478   // just compare against the regex itself and hope for the best.
479   //
480   // FIXME: One easy improvement here is have the regex lib generate a single
481   // example regular expression which matches, and use that as the example
482   // string.
483   StringRef ExampleString(FixedStr);
484   if (ExampleString.empty())
485     ExampleString = RegExStr;
486 
487   // Only compare up to the first line in the buffer, or the string size.
488   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
489   BufferPrefix = BufferPrefix.split('\n').first;
490   return BufferPrefix.edit_distance(ExampleString);
491 }
492 
493 /// Prints additional information about a failure to match involving this
494 /// pattern.
495 void Pattern::PrintFailureInfo(
496     const SourceMgr &SM, StringRef Buffer,
497     const StringMap<StringRef> &VariableTable) const {
498   // If this was a regular expression using variables, print the current
499   // variable values.
500   if (!VariableUses.empty()) {
501     for (const auto &VariableUse : VariableUses) {
502       SmallString<256> Msg;
503       raw_svector_ostream OS(Msg);
504       StringRef Var = VariableUse.first;
505       if (Var[0] == '@') {
506         std::string Value;
507         if (EvaluateExpression(Var, Value)) {
508           OS << "with expression \"";
509           OS.write_escaped(Var) << "\" equal to \"";
510           OS.write_escaped(Value) << "\"";
511         } else {
512           OS << "uses incorrect expression \"";
513           OS.write_escaped(Var) << "\"";
514         }
515       } else {
516         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
517 
518         // Check for undefined variable references.
519         if (it == VariableTable.end()) {
520           OS << "uses undefined variable \"";
521           OS.write_escaped(Var) << "\"";
522         } else {
523           OS << "with variable \"";
524           OS.write_escaped(Var) << "\" equal to \"";
525           OS.write_escaped(it->second) << "\"";
526         }
527       }
528 
529       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
530                       OS.str());
531     }
532   }
533 
534   // Attempt to find the closest/best fuzzy match.  Usually an error happens
535   // because some string in the output didn't exactly match. In these cases, we
536   // would like to show the user a best guess at what "should have" matched, to
537   // save them having to actually check the input manually.
538   size_t NumLinesForward = 0;
539   size_t Best = StringRef::npos;
540   double BestQuality = 0;
541 
542   // Use an arbitrary 4k limit on how far we will search.
543   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
544     if (Buffer[i] == '\n')
545       ++NumLinesForward;
546 
547     // Patterns have leading whitespace stripped, so skip whitespace when
548     // looking for something which looks like a pattern.
549     if (Buffer[i] == ' ' || Buffer[i] == '\t')
550       continue;
551 
552     // Compute the "quality" of this match as an arbitrary combination of the
553     // match distance and the number of lines skipped to get to this match.
554     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
555     double Quality = Distance + (NumLinesForward / 100.);
556 
557     if (Quality < BestQuality || Best == StringRef::npos) {
558       Best = i;
559       BestQuality = Quality;
560     }
561   }
562 
563   // Print the "possible intended match here" line if we found something
564   // reasonable and not equal to what we showed in the "scanning from here"
565   // line.
566   if (Best && Best != StringRef::npos && BestQuality < 50) {
567     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
568                     SourceMgr::DK_Note, "possible intended match here");
569 
570     // FIXME: If we wanted to be really friendly we would show why the match
571     // failed, as it can be hard to spot simple one character differences.
572   }
573 }
574 
575 /// Finds the closing sequence of a regex variable usage or definition.
576 ///
577 /// \p Str has to point in the beginning of the definition (right after the
578 /// opening sequence). Returns the offset of the closing sequence within Str,
579 /// or npos if it was not found.
580 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
581   // Offset keeps track of the current offset within the input Str
582   size_t Offset = 0;
583   // [...] Nesting depth
584   size_t BracketDepth = 0;
585 
586   while (!Str.empty()) {
587     if (Str.startswith("]]") && BracketDepth == 0)
588       return Offset;
589     if (Str[0] == '\\') {
590       // Backslash escapes the next char within regexes, so skip them both.
591       Str = Str.substr(2);
592       Offset += 2;
593     } else {
594       switch (Str[0]) {
595       default:
596         break;
597       case '[':
598         BracketDepth++;
599         break;
600       case ']':
601         if (BracketDepth == 0) {
602           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
603                           SourceMgr::DK_Error,
604                           "missing closing \"]\" for regex variable");
605           exit(1);
606         }
607         BracketDepth--;
608         break;
609       }
610       Str = Str.substr(1);
611       Offset++;
612     }
613   }
614 
615   return StringRef::npos;
616 }
617 
618 //===----------------------------------------------------------------------===//
619 // Check Strings.
620 //===----------------------------------------------------------------------===//
621 
622 /// A check that we found in the input file.
623 struct CheckString {
624   /// The pattern to match.
625   Pattern Pat;
626 
627   /// Which prefix name this check matched.
628   StringRef Prefix;
629 
630   /// The location in the match file that the check string was specified.
631   SMLoc Loc;
632 
633   /// All of the strings that are disallowed from occurring between this match
634   /// string and the previous one (or start of file).
635   std::vector<Pattern> DagNotStrings;
636 
637   CheckString(const Pattern &P, StringRef S, SMLoc L)
638       : Pat(P), Prefix(S), Loc(L) {}
639 
640   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
641                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
642 
643   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
644   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
645   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
646                 const std::vector<const Pattern *> &NotStrings,
647                 StringMap<StringRef> &VariableTable) const;
648   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
649                   std::vector<const Pattern *> &NotStrings,
650                   StringMap<StringRef> &VariableTable) const;
651 };
652 
653 /// Canonicalize whitespaces in the file. Line endings are replaced with
654 /// UNIX-style '\n'.
655 static StringRef CanonicalizeFile(MemoryBuffer &MB,
656                                   SmallVectorImpl<char> &OutputBuffer) {
657   OutputBuffer.reserve(MB.getBufferSize());
658 
659   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
660        Ptr != End; ++Ptr) {
661     // Eliminate trailing dosish \r.
662     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
663       continue;
664     }
665 
666     // If current char is not a horizontal whitespace or if horizontal
667     // whitespace canonicalization is disabled, dump it to output as is.
668     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
669       OutputBuffer.push_back(*Ptr);
670       continue;
671     }
672 
673     // Otherwise, add one space and advance over neighboring space.
674     OutputBuffer.push_back(' ');
675     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
676       ++Ptr;
677   }
678 
679   // Add a null byte and then return all but that byte.
680   OutputBuffer.push_back('\0');
681   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
682 }
683 
684 static bool IsPartOfWord(char c) {
685   return (isalnum(c) || c == '-' || c == '_');
686 }
687 
688 // Get the size of the prefix extension.
689 static size_t CheckTypeSize(Check::CheckType Ty) {
690   switch (Ty) {
691   case Check::CheckNone:
692   case Check::CheckBadNot:
693     return 0;
694 
695   case Check::CheckPlain:
696     return sizeof(":") - 1;
697 
698   case Check::CheckNext:
699     return sizeof("-NEXT:") - 1;
700 
701   case Check::CheckSame:
702     return sizeof("-SAME:") - 1;
703 
704   case Check::CheckNot:
705     return sizeof("-NOT:") - 1;
706 
707   case Check::CheckDAG:
708     return sizeof("-DAG:") - 1;
709 
710   case Check::CheckLabel:
711     return sizeof("-LABEL:") - 1;
712 
713   case Check::CheckEOF:
714     llvm_unreachable("Should not be using EOF size");
715   }
716 
717   llvm_unreachable("Bad check type");
718 }
719 
720 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
721   if (Buffer.size() <= Prefix.size())
722     return Check::CheckNone;
723 
724   char NextChar = Buffer[Prefix.size()];
725 
726   // Verify that the : is present after the prefix.
727   if (NextChar == ':')
728     return Check::CheckPlain;
729 
730   if (NextChar != '-')
731     return Check::CheckNone;
732 
733   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
734   if (Rest.startswith("NEXT:"))
735     return Check::CheckNext;
736 
737   if (Rest.startswith("SAME:"))
738     return Check::CheckSame;
739 
740   if (Rest.startswith("NOT:"))
741     return Check::CheckNot;
742 
743   if (Rest.startswith("DAG:"))
744     return Check::CheckDAG;
745 
746   if (Rest.startswith("LABEL:"))
747     return Check::CheckLabel;
748 
749   // You can't combine -NOT with another suffix.
750   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
751       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
752       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
753     return Check::CheckBadNot;
754 
755   return Check::CheckNone;
756 }
757 
758 // From the given position, find the next character after the word.
759 static size_t SkipWord(StringRef Str, size_t Loc) {
760   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
761     ++Loc;
762   return Loc;
763 }
764 
765 /// Search the buffer for the first prefix in the prefix regular expression.
766 ///
767 /// This searches the buffer using the provided regular expression, however it
768 /// enforces constraints beyond that:
769 /// 1) The found prefix must not be a suffix of something that looks like
770 ///    a valid prefix.
771 /// 2) The found prefix must be followed by a valid check type suffix using \c
772 ///    FindCheckType above.
773 ///
774 /// The first match of the regular expression to satisfy these two is returned,
775 /// otherwise an empty StringRef is returned to indicate failure.
776 ///
777 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
778 /// start at the beginning of the returned prefix, increment \p LineNumber for
779 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
780 /// check found by examining the suffix.
781 ///
782 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
783 /// is unspecified.
784 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
785                                          unsigned &LineNumber,
786                                          Check::CheckType &CheckTy) {
787   SmallVector<StringRef, 2> Matches;
788 
789   while (!Buffer.empty()) {
790     // Find the first (longest) match using the RE.
791     if (!PrefixRE.match(Buffer, &Matches))
792       // No match at all, bail.
793       return StringRef();
794 
795     StringRef Prefix = Matches[0];
796     Matches.clear();
797 
798     assert(Prefix.data() >= Buffer.data() &&
799            Prefix.data() < Buffer.data() + Buffer.size() &&
800            "Prefix doesn't start inside of buffer!");
801     size_t Loc = Prefix.data() - Buffer.data();
802     StringRef Skipped = Buffer.substr(0, Loc);
803     Buffer = Buffer.drop_front(Loc);
804     LineNumber += Skipped.count('\n');
805 
806     // Check that the matched prefix isn't a suffix of some other check-like
807     // word.
808     // FIXME: This is a very ad-hoc check. it would be better handled in some
809     // other way. Among other things it seems hard to distinguish between
810     // intentional and unintentional uses of this feature.
811     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
812       // Now extract the type.
813       CheckTy = FindCheckType(Buffer, Prefix);
814 
815       // If we've found a valid check type for this prefix, we're done.
816       if (CheckTy != Check::CheckNone)
817         return Prefix;
818     }
819 
820     // If we didn't successfully find a prefix, we need to skip this invalid
821     // prefix and continue scanning. We directly skip the prefix that was
822     // matched and any additional parts of that check-like word.
823     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
824   }
825 
826   // We ran out of buffer while skipping partial matches so give up.
827   return StringRef();
828 }
829 
830 /// Read the check file, which specifies the sequence of expected strings.
831 ///
832 /// The strings are added to the CheckStrings vector. Returns true in case of
833 /// an error, false otherwise.
834 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
835                           std::vector<CheckString> &CheckStrings) {
836   std::vector<Pattern> ImplicitNegativeChecks;
837   for (const auto &PatternString : ImplicitCheckNot) {
838     // Create a buffer with fake command line content in order to display the
839     // command line option responsible for the specific implicit CHECK-NOT.
840     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
841     std::string Suffix = "'";
842     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
843         Prefix + PatternString + Suffix, "command line");
844 
845     StringRef PatternInBuffer =
846         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
847     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
848 
849     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
850     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
851                                                "IMPLICIT-CHECK", SM, 0);
852   }
853 
854   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
855 
856   // LineNumber keeps track of the line on which CheckPrefix instances are
857   // found.
858   unsigned LineNumber = 1;
859 
860   while (1) {
861     Check::CheckType CheckTy;
862 
863     // See if a prefix occurs in the memory buffer.
864     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
865                                                    CheckTy);
866     if (UsedPrefix.empty())
867       break;
868     assert(UsedPrefix.data() == Buffer.data() &&
869            "Failed to move Buffer's start forward, or pointed prefix outside "
870            "of the buffer!");
871 
872     // Location to use for error messages.
873     const char *UsedPrefixStart = UsedPrefix.data();
874 
875     // Skip the buffer to the end.
876     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
877 
878     // Complain about useful-looking but unsupported suffixes.
879     if (CheckTy == Check::CheckBadNot) {
880       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
881                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
882       return true;
883     }
884 
885     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
886     // leading whitespace.
887     if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
888       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
889 
890     // Scan ahead to the end of line.
891     size_t EOL = Buffer.find_first_of("\n\r");
892 
893     // Remember the location of the start of the pattern, for diagnostics.
894     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
895 
896     // Parse the pattern.
897     Pattern P(CheckTy);
898     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
899       return true;
900 
901     // Verify that CHECK-LABEL lines do not define or use variables
902     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
903       SM.PrintMessage(
904           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
905           "found '" + UsedPrefix + "-LABEL:'"
906                                    " with variable definition or use");
907       return true;
908     }
909 
910     Buffer = Buffer.substr(EOL);
911 
912     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
913     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
914         CheckStrings.empty()) {
915       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
916       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
917                       SourceMgr::DK_Error,
918                       "found '" + UsedPrefix + "-" + Type +
919                           "' without previous '" + UsedPrefix + ": line");
920       return true;
921     }
922 
923     // Handle CHECK-DAG/-NOT.
924     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
925       DagNotMatches.push_back(P);
926       continue;
927     }
928 
929     // Okay, add the string we captured to the output vector and move on.
930     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
931     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
932     DagNotMatches = ImplicitNegativeChecks;
933   }
934 
935   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
936   // prefix as a filler for the error message.
937   if (!DagNotMatches.empty()) {
938     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
939                               SMLoc::getFromPointer(Buffer.data()));
940     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
941   }
942 
943   if (CheckStrings.empty()) {
944     errs() << "error: no check strings found with prefix"
945            << (CheckPrefixes.size() > 1 ? "es " : " ");
946     prefix_iterator I = CheckPrefixes.begin();
947     prefix_iterator E = CheckPrefixes.end();
948     if (I != E) {
949       errs() << "\'" << *I << ":'";
950       ++I;
951     }
952     for (; I != E; ++I)
953       errs() << ", \'" << *I << ":'";
954 
955     errs() << '\n';
956     return true;
957   }
958 
959   return false;
960 }
961 
962 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
963                              StringRef Buffer,
964                              StringMap<StringRef> &VariableTable) {
965   // Otherwise, we have an error, emit an error message.
966   SM.PrintMessage(Loc, SourceMgr::DK_Error,
967                   "expected string not found in input");
968 
969   // Print the "scanning from here" line.  If the current position is at the
970   // end of a line, advance to the start of the next line.
971   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
972 
973   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
974                   "scanning from here");
975 
976   // Allow the pattern to print additional information if desired.
977   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
978 }
979 
980 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
981                              StringRef Buffer,
982                              StringMap<StringRef> &VariableTable) {
983   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
984 }
985 
986 /// Count the number of newlines in the specified range.
987 static unsigned CountNumNewlinesBetween(StringRef Range,
988                                         const char *&FirstNewLine) {
989   unsigned NumNewLines = 0;
990   while (1) {
991     // Scan for newline.
992     Range = Range.substr(Range.find_first_of("\n\r"));
993     if (Range.empty())
994       return NumNewLines;
995 
996     ++NumNewLines;
997 
998     // Handle \n\r and \r\n as a single newline.
999     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
1000         (Range[0] != Range[1]))
1001       Range = Range.substr(1);
1002     Range = Range.substr(1);
1003 
1004     if (NumNewLines == 1)
1005       FirstNewLine = Range.begin();
1006   }
1007 }
1008 
1009 /// Match check string and its "not strings" and/or "dag strings".
1010 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1011                           bool IsLabelScanMode, size_t &MatchLen,
1012                           StringMap<StringRef> &VariableTable) const {
1013   size_t LastPos = 0;
1014   std::vector<const Pattern *> NotStrings;
1015 
1016   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1017   // bounds; we have not processed variable definitions within the bounded block
1018   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1019   // over the block again (including the last CHECK-LABEL) in normal mode.
1020   if (!IsLabelScanMode) {
1021     // Match "dag strings" (with mixed "not strings" if any).
1022     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1023     if (LastPos == StringRef::npos)
1024       return StringRef::npos;
1025   }
1026 
1027   // Match itself from the last position after matching CHECK-DAG.
1028   StringRef MatchBuffer = Buffer.substr(LastPos);
1029   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1030   if (MatchPos == StringRef::npos) {
1031     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1032     return StringRef::npos;
1033   }
1034 
1035   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1036   // or CHECK-NOT
1037   if (!IsLabelScanMode) {
1038     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1039 
1040     // If this check is a "CHECK-NEXT", verify that the previous match was on
1041     // the previous line (i.e. that there is one newline between them).
1042     if (CheckNext(SM, SkippedRegion))
1043       return StringRef::npos;
1044 
1045     // If this check is a "CHECK-SAME", verify that the previous match was on
1046     // the same line (i.e. that there is no newline between them).
1047     if (CheckSame(SM, SkippedRegion))
1048       return StringRef::npos;
1049 
1050     // If this match had "not strings", verify that they don't exist in the
1051     // skipped region.
1052     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1053       return StringRef::npos;
1054   }
1055 
1056   return LastPos + MatchPos;
1057 }
1058 
1059 /// Verify there is a single line in the given buffer.
1060 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1061   if (Pat.getCheckTy() != Check::CheckNext)
1062     return false;
1063 
1064   // Count the number of newlines between the previous match and this one.
1065   assert(Buffer.data() !=
1066              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1067                                     SMLoc::getFromPointer(Buffer.data())))
1068                  ->getBufferStart() &&
1069          "CHECK-NEXT can't be the first check in a file");
1070 
1071   const char *FirstNewLine = nullptr;
1072   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1073 
1074   if (NumNewLines == 0) {
1075     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1076                     Prefix + "-NEXT: is on the same line as previous match");
1077     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1078                     "'next' match was here");
1079     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1080                     "previous match ended here");
1081     return true;
1082   }
1083 
1084   if (NumNewLines != 1) {
1085     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1086                     Prefix +
1087                         "-NEXT: is not on the line after the previous match");
1088     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1089                     "'next' match was here");
1090     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1091                     "previous match ended here");
1092     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1093                     "non-matching line after previous match is here");
1094     return true;
1095   }
1096 
1097   return false;
1098 }
1099 
1100 /// Verify there is no newline in the given buffer.
1101 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1102   if (Pat.getCheckTy() != Check::CheckSame)
1103     return false;
1104 
1105   // Count the number of newlines between the previous match and this one.
1106   assert(Buffer.data() !=
1107              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1108                                     SMLoc::getFromPointer(Buffer.data())))
1109                  ->getBufferStart() &&
1110          "CHECK-SAME can't be the first check in a file");
1111 
1112   const char *FirstNewLine = nullptr;
1113   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1114 
1115   if (NumNewLines != 0) {
1116     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1117                     Prefix +
1118                         "-SAME: is not on the same line as the previous match");
1119     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1120                     "'next' match was here");
1121     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1122                     "previous match ended here");
1123     return true;
1124   }
1125 
1126   return false;
1127 }
1128 
1129 /// Verify there's no "not strings" in the given buffer.
1130 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1131                            const std::vector<const Pattern *> &NotStrings,
1132                            StringMap<StringRef> &VariableTable) const {
1133   for (const Pattern *Pat : NotStrings) {
1134     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1135 
1136     size_t MatchLen = 0;
1137     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1138 
1139     if (Pos == StringRef::npos)
1140       continue;
1141 
1142     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
1143                     SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
1144     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1145                     Prefix + "-NOT: pattern specified here");
1146     return true;
1147   }
1148 
1149   return false;
1150 }
1151 
1152 /// Match "dag strings" and their mixed "not strings".
1153 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1154                              std::vector<const Pattern *> &NotStrings,
1155                              StringMap<StringRef> &VariableTable) const {
1156   if (DagNotStrings.empty())
1157     return 0;
1158 
1159   size_t LastPos = 0;
1160   size_t StartPos = LastPos;
1161 
1162   for (const Pattern &Pat : DagNotStrings) {
1163     assert((Pat.getCheckTy() == Check::CheckDAG ||
1164             Pat.getCheckTy() == Check::CheckNot) &&
1165            "Invalid CHECK-DAG or CHECK-NOT!");
1166 
1167     if (Pat.getCheckTy() == Check::CheckNot) {
1168       NotStrings.push_back(&Pat);
1169       continue;
1170     }
1171 
1172     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1173 
1174     size_t MatchLen = 0, MatchPos;
1175 
1176     // CHECK-DAG always matches from the start.
1177     StringRef MatchBuffer = Buffer.substr(StartPos);
1178     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1179     // With a group of CHECK-DAGs, a single mismatching means the match on
1180     // that group of CHECK-DAGs fails immediately.
1181     if (MatchPos == StringRef::npos) {
1182       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1183       return StringRef::npos;
1184     }
1185     // Re-calc it as the offset relative to the start of the original string.
1186     MatchPos += StartPos;
1187 
1188     if (!NotStrings.empty()) {
1189       if (MatchPos < LastPos) {
1190         // Reordered?
1191         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1192                         SourceMgr::DK_Error,
1193                         Prefix + "-DAG: found a match of CHECK-DAG"
1194                                  " reordering across a CHECK-NOT");
1195         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1196                         SourceMgr::DK_Note,
1197                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1198                                  " is found here");
1199         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1200                         Prefix + "-NOT: the crossed pattern specified"
1201                                  " here");
1202         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1203                         Prefix + "-DAG: the reordered pattern specified"
1204                                  " here");
1205         return StringRef::npos;
1206       }
1207       // All subsequent CHECK-DAGs should be matched from the farthest
1208       // position of all precedent CHECK-DAGs (including this one.)
1209       StartPos = LastPos;
1210       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1211       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1212       // region.
1213       StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos);
1214       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1215         return StringRef::npos;
1216       // Clear "not strings".
1217       NotStrings.clear();
1218     }
1219 
1220     // Update the last position with CHECK-DAG matches.
1221     LastPos = std::max(MatchPos + MatchLen, LastPos);
1222   }
1223 
1224   return LastPos;
1225 }
1226 
1227 // A check prefix must contain only alphanumeric, hyphens and underscores.
1228 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1229   Regex Validator("^[a-zA-Z0-9_-]*$");
1230   return Validator.match(CheckPrefix);
1231 }
1232 
1233 static bool ValidateCheckPrefixes() {
1234   StringSet<> PrefixSet;
1235 
1236   for (StringRef Prefix : CheckPrefixes) {
1237     // Reject empty prefixes.
1238     if (Prefix == "")
1239       return false;
1240 
1241     if (!PrefixSet.insert(Prefix).second)
1242       return false;
1243 
1244     if (!ValidateCheckPrefix(Prefix))
1245       return false;
1246   }
1247 
1248   return true;
1249 }
1250 
1251 // Combines the check prefixes into a single regex so that we can efficiently
1252 // scan for any of the set.
1253 //
1254 // The semantics are that the longest-match wins which matches our regex
1255 // library.
1256 static Regex buildCheckPrefixRegex() {
1257   // I don't think there's a way to specify an initial value for cl::list,
1258   // so if nothing was specified, add the default
1259   if (CheckPrefixes.empty())
1260     CheckPrefixes.push_back("CHECK");
1261 
1262   // We already validated the contents of CheckPrefixes so just concatenate
1263   // them as alternatives.
1264   SmallString<32> PrefixRegexStr;
1265   for (StringRef Prefix : CheckPrefixes) {
1266     if (Prefix != CheckPrefixes.front())
1267       PrefixRegexStr.push_back('|');
1268 
1269     PrefixRegexStr.append(Prefix);
1270   }
1271 
1272   return Regex(PrefixRegexStr);
1273 }
1274 
1275 static void DumpCommandLine(int argc, char **argv) {
1276   errs() << "FileCheck command line: ";
1277   for (int I = 0; I < argc; I++)
1278     errs() << " " << argv[I];
1279   errs() << "\n";
1280 }
1281 
1282 // Remove local variables from \p VariableTable. Global variables
1283 // (start with '$') are preserved.
1284 static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
1285   SmallVector<StringRef, 16> LocalVars;
1286   for (const auto &Var : VariableTable)
1287     if (Var.first()[0] != '$')
1288       LocalVars.push_back(Var.first());
1289 
1290   for (const auto &Var : LocalVars)
1291     VariableTable.erase(Var);
1292 }
1293 
1294 /// Check the input to FileCheck provided in the \p Buffer against the \p
1295 /// CheckStrings read from the check file.
1296 ///
1297 /// Returns false if the input fails to satisfy the checks.
1298 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1299                 ArrayRef<CheckString> CheckStrings) {
1300   bool ChecksFailed = false;
1301 
1302   /// VariableTable - This holds all the current filecheck variables.
1303   StringMap<StringRef> VariableTable;
1304 
1305   for (const auto& Def : GlobalDefines)
1306     VariableTable.insert(StringRef(Def).split('='));
1307 
1308   unsigned i = 0, j = 0, e = CheckStrings.size();
1309   while (true) {
1310     StringRef CheckRegion;
1311     if (j == e) {
1312       CheckRegion = Buffer;
1313     } else {
1314       const CheckString &CheckLabelStr = CheckStrings[j];
1315       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1316         ++j;
1317         continue;
1318       }
1319 
1320       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1321       size_t MatchLabelLen = 0;
1322       size_t MatchLabelPos =
1323           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
1324       if (MatchLabelPos == StringRef::npos)
1325         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1326         return false;
1327 
1328       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1329       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1330       ++j;
1331     }
1332 
1333     if (EnableVarScope)
1334       ClearLocalVars(VariableTable);
1335 
1336     for (; i != j; ++i) {
1337       const CheckString &CheckStr = CheckStrings[i];
1338 
1339       // Check each string within the scanned region, including a second check
1340       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1341       size_t MatchLen = 0;
1342       size_t MatchPos =
1343           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
1344 
1345       if (MatchPos == StringRef::npos) {
1346         ChecksFailed = true;
1347         i = j;
1348         break;
1349       }
1350 
1351       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1352     }
1353 
1354     if (j == e)
1355       break;
1356   }
1357 
1358   // Success if no checks failed.
1359   return !ChecksFailed;
1360 }
1361 
1362 int main(int argc, char **argv) {
1363   sys::PrintStackTraceOnErrorSignal(argv[0]);
1364   PrettyStackTraceProgram X(argc, argv);
1365   cl::ParseCommandLineOptions(argc, argv);
1366 
1367   if (!ValidateCheckPrefixes()) {
1368     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1369               "start with a letter and contain only alphanumeric characters, "
1370               "hyphens and underscores\n";
1371     return 2;
1372   }
1373 
1374   Regex PrefixRE = buildCheckPrefixRegex();
1375   std::string REError;
1376   if (!PrefixRE.isValid(REError)) {
1377     errs() << "Unable to combine check-prefix strings into a prefix regular "
1378               "expression! This is likely a bug in FileCheck's verification of "
1379               "the check-prefix strings. Regular expression parsing failed "
1380               "with the following error: "
1381            << REError << "\n";
1382     return 2;
1383   }
1384 
1385   SourceMgr SM;
1386 
1387   // Read the expected strings from the check file.
1388   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1389       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1390   if (std::error_code EC = CheckFileOrErr.getError()) {
1391     errs() << "Could not open check file '" << CheckFilename
1392            << "': " << EC.message() << '\n';
1393     return 2;
1394   }
1395   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1396 
1397   SmallString<4096> CheckFileBuffer;
1398   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
1399 
1400   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1401                             CheckFileText, CheckFile.getBufferIdentifier()),
1402                         SMLoc());
1403 
1404   std::vector<CheckString> CheckStrings;
1405   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
1406     return 2;
1407 
1408   // Open the file to check and add it to SourceMgr.
1409   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1410       MemoryBuffer::getFileOrSTDIN(InputFilename);
1411   if (std::error_code EC = InputFileOrErr.getError()) {
1412     errs() << "Could not open input file '" << InputFilename
1413            << "': " << EC.message() << '\n';
1414     return 2;
1415   }
1416   MemoryBuffer &InputFile = *InputFileOrErr.get();
1417 
1418   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1419     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1420     DumpCommandLine(argc, argv);
1421     return 2;
1422   }
1423 
1424   SmallString<4096> InputFileBuffer;
1425   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
1426 
1427   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1428                             InputFileText, InputFile.getBufferIdentifier()),
1429                         SMLoc());
1430 
1431   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1432 }
1433