xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision cf60ab313a33b5e6a480311a9d5d2765b03c384d)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an exit status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43                   cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string> CheckPrefixes(
46     "check-prefix",
47     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool> NoCanonicalizeWhiteSpace(
55     "strict-whitespace",
56     cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::opt<bool> AllowEmptyInput(
66     "allow-empty", cl::init(false),
67     cl::desc("Allow the input file to be empty. This is useful when making\n"
68              "checks that some error message does not occur, for example."));
69 
70 static cl::opt<bool> MatchFullLines(
71     "match-full-lines", cl::init(false),
72     cl::desc("Require all positive matches to cover an entire input line.\n"
73              "Allows leading and trailing whitespace if --strict-whitespace\n"
74              "is not also passed."));
75 
76 static cl::opt<bool> EnableVarScope(
77     "enable-var-scope", cl::init(false),
78     cl::desc("Enables scope for regex variables. Variables with names that\n"
79              "do not start with '$' will be reset at the beginning of\n"
80              "each CHECK-LABEL block."));
81 
82 typedef cl::list<std::string>::const_iterator prefix_iterator;
83 
84 //===----------------------------------------------------------------------===//
85 // Pattern Handling Code.
86 //===----------------------------------------------------------------------===//
87 
88 namespace Check {
89 enum CheckType {
90   CheckNone = 0,
91   CheckPlain,
92   CheckNext,
93   CheckSame,
94   CheckNot,
95   CheckDAG,
96   CheckLabel,
97 
98   /// Indicates the pattern only matches the end of file. This is used for
99   /// trailing CHECK-NOTs.
100   CheckEOF,
101 
102   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
103   CheckBadNot
104 };
105 }
106 
107 class Pattern {
108   SMLoc PatternLoc;
109 
110   /// A fixed string to match as the pattern or empty if this pattern requires
111   /// a regex match.
112   StringRef FixedStr;
113 
114   /// A regex string to match as the pattern or empty if this pattern requires
115   /// a fixed string to match.
116   std::string RegExStr;
117 
118   /// Entries in this vector map to uses of a variable in the pattern, e.g.
119   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
120   /// we'll get an entry in this vector that tells us to insert the value of
121   /// bar at offset 3.
122   std::vector<std::pair<StringRef, unsigned>> VariableUses;
123 
124   /// Maps definitions of variables to their parenthesized capture numbers.
125   ///
126   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
127   /// 1.
128   std::map<StringRef, unsigned> VariableDefs;
129 
130   Check::CheckType CheckTy;
131 
132   /// Contains the number of line this pattern is in.
133   unsigned LineNumber;
134 
135 public:
136   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
137 
138   /// Returns the location in source code.
139   SMLoc getLoc() const { return PatternLoc; }
140 
141   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
142                     unsigned LineNumber);
143   size_t Match(StringRef Buffer, size_t &MatchLen,
144                StringMap<StringRef> &VariableTable) const;
145   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
146                         const StringMap<StringRef> &VariableTable) const;
147 
148   bool hasVariable() const {
149     return !(VariableUses.empty() && VariableDefs.empty());
150   }
151 
152   Check::CheckType getCheckTy() const { return CheckTy; }
153 
154 private:
155   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
156   void AddBackrefToRegEx(unsigned BackrefNum);
157   unsigned
158   ComputeMatchDistance(StringRef Buffer,
159                        const StringMap<StringRef> &VariableTable) const;
160   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
161   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
162 };
163 
164 /// Parses the given string into the Pattern.
165 ///
166 /// \p Prefix provides which prefix is being matched, \p SM provides the
167 /// SourceMgr used for error reports, and \p LineNumber is the line number in
168 /// the input file from which the pattern string was read. Returns true in
169 /// case of an error, false otherwise.
170 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
171                            SourceMgr &SM, unsigned LineNumber) {
172   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
173 
174   this->LineNumber = LineNumber;
175   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
176 
177   if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
178     // Ignore trailing whitespace.
179     while (!PatternStr.empty() &&
180            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
181       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
182 
183   // Check that there is something on the line.
184   if (PatternStr.empty()) {
185     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
186                     "found empty check string with prefix '" + Prefix + ":'");
187     return true;
188   }
189 
190   // Check to see if this is a fixed string, or if it has regex pieces.
191   if (!MatchFullLinesHere &&
192       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
193                                  PatternStr.find("[[") == StringRef::npos))) {
194     FixedStr = PatternStr;
195     return false;
196   }
197 
198   if (MatchFullLinesHere) {
199     RegExStr += '^';
200     if (!NoCanonicalizeWhiteSpace)
201       RegExStr += " *";
202   }
203 
204   // Paren value #0 is for the fully matched string.  Any new parenthesized
205   // values add from there.
206   unsigned CurParen = 1;
207 
208   // Otherwise, there is at least one regex piece.  Build up the regex pattern
209   // by escaping scary characters in fixed strings, building up one big regex.
210   while (!PatternStr.empty()) {
211     // RegEx matches.
212     if (PatternStr.startswith("{{")) {
213       // This is the start of a regex match.  Scan for the }}.
214       size_t End = PatternStr.find("}}");
215       if (End == StringRef::npos) {
216         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
217                         SourceMgr::DK_Error,
218                         "found start of regex string with no end '}}'");
219         return true;
220       }
221 
222       // Enclose {{}} patterns in parens just like [[]] even though we're not
223       // capturing the result for any purpose.  This is required in case the
224       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
225       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
226       RegExStr += '(';
227       ++CurParen;
228 
229       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
230         return true;
231       RegExStr += ')';
232 
233       PatternStr = PatternStr.substr(End + 2);
234       continue;
235     }
236 
237     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
238     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
239     // second form is [[foo]] which is a reference to foo.  The variable name
240     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
241     // it.  This is to catch some common errors.
242     if (PatternStr.startswith("[[")) {
243       // Find the closing bracket pair ending the match.  End is going to be an
244       // offset relative to the beginning of the match string.
245       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
246 
247       if (End == StringRef::npos) {
248         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
249                         SourceMgr::DK_Error,
250                         "invalid named regex reference, no ]] found");
251         return true;
252       }
253 
254       StringRef MatchStr = PatternStr.substr(2, End);
255       PatternStr = PatternStr.substr(End + 4);
256 
257       // Get the regex name (e.g. "foo").
258       size_t NameEnd = MatchStr.find(':');
259       StringRef Name = MatchStr.substr(0, NameEnd);
260 
261       if (Name.empty()) {
262         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
263                         "invalid name in named regex: empty name");
264         return true;
265       }
266 
267       // Verify that the name/expression is well formed. FileCheck currently
268       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
269       // is relaxed, more strict check is performed in \c EvaluateExpression.
270       bool IsExpression = false;
271       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
272         if (i == 0) {
273           if (Name[i] == '$')  // Global vars start with '$'
274             continue;
275           if (Name[i] == '@') {
276             if (NameEnd != StringRef::npos) {
277               SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
278                               SourceMgr::DK_Error,
279                               "invalid name in named regex definition");
280               return true;
281             }
282             IsExpression = true;
283             continue;
284           }
285         }
286         if (Name[i] != '_' && !isalnum(Name[i]) &&
287             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
288           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
289                           SourceMgr::DK_Error, "invalid name in named regex");
290           return true;
291         }
292       }
293 
294       // Name can't start with a digit.
295       if (isdigit(static_cast<unsigned char>(Name[0]))) {
296         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
297                         "invalid name in named regex");
298         return true;
299       }
300 
301       // Handle [[foo]].
302       if (NameEnd == StringRef::npos) {
303         // Handle variables that were defined earlier on the same line by
304         // emitting a backreference.
305         if (VariableDefs.find(Name) != VariableDefs.end()) {
306           unsigned VarParenNum = VariableDefs[Name];
307           if (VarParenNum < 1 || VarParenNum > 9) {
308             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
309                             SourceMgr::DK_Error,
310                             "Can't back-reference more than 9 variables");
311             return true;
312           }
313           AddBackrefToRegEx(VarParenNum);
314         } else {
315           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
316         }
317         continue;
318       }
319 
320       // Handle [[foo:.*]].
321       VariableDefs[Name] = CurParen;
322       RegExStr += '(';
323       ++CurParen;
324 
325       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
326         return true;
327 
328       RegExStr += ')';
329     }
330 
331     // Handle fixed string matches.
332     // Find the end, which is the start of the next regex.
333     size_t FixedMatchEnd = PatternStr.find("{{");
334     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
335     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
336     PatternStr = PatternStr.substr(FixedMatchEnd);
337   }
338 
339   if (MatchFullLinesHere) {
340     if (!NoCanonicalizeWhiteSpace)
341       RegExStr += " *";
342     RegExStr += '$';
343   }
344 
345   return false;
346 }
347 
348 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
349   Regex R(RS);
350   std::string Error;
351   if (!R.isValid(Error)) {
352     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
353                     "invalid regex: " + Error);
354     return true;
355   }
356 
357   RegExStr += RS.str();
358   CurParen += R.getNumMatches();
359   return false;
360 }
361 
362 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
363   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
364   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
365   RegExStr += Backref;
366 }
367 
368 /// Evaluates expression and stores the result to \p Value.
369 ///
370 /// Returns true on success and false when the expression has invalid syntax.
371 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
372   // The only supported expression is @LINE([\+-]\d+)?
373   if (!Expr.startswith("@LINE"))
374     return false;
375   Expr = Expr.substr(StringRef("@LINE").size());
376   int Offset = 0;
377   if (!Expr.empty()) {
378     if (Expr[0] == '+')
379       Expr = Expr.substr(1);
380     else if (Expr[0] != '-')
381       return false;
382     if (Expr.getAsInteger(10, Offset))
383       return false;
384   }
385   Value = llvm::itostr(LineNumber + Offset);
386   return true;
387 }
388 
389 /// Matches the pattern string against the input buffer \p Buffer
390 ///
391 /// This returns the position that is matched or npos if there is no match. If
392 /// there is a match, the size of the matched string is returned in \p
393 /// MatchLen.
394 ///
395 /// The \p VariableTable StringMap provides the current values of filecheck
396 /// variables and is updated if this match defines new values.
397 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
398                       StringMap<StringRef> &VariableTable) const {
399   // If this is the EOF pattern, match it immediately.
400   if (CheckTy == Check::CheckEOF) {
401     MatchLen = 0;
402     return Buffer.size();
403   }
404 
405   // If this is a fixed string pattern, just match it now.
406   if (!FixedStr.empty()) {
407     MatchLen = FixedStr.size();
408     return Buffer.find(FixedStr);
409   }
410 
411   // Regex match.
412 
413   // If there are variable uses, we need to create a temporary string with the
414   // actual value.
415   StringRef RegExToMatch = RegExStr;
416   std::string TmpStr;
417   if (!VariableUses.empty()) {
418     TmpStr = RegExStr;
419 
420     unsigned InsertOffset = 0;
421     for (const auto &VariableUse : VariableUses) {
422       std::string Value;
423 
424       if (VariableUse.first[0] == '@') {
425         if (!EvaluateExpression(VariableUse.first, Value))
426           return StringRef::npos;
427       } else {
428         StringMap<StringRef>::iterator it =
429             VariableTable.find(VariableUse.first);
430         // If the variable is undefined, return an error.
431         if (it == VariableTable.end())
432           return StringRef::npos;
433 
434         // Look up the value and escape it so that we can put it into the regex.
435         Value += Regex::escape(it->second);
436       }
437 
438       // Plop it into the regex at the adjusted offset.
439       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
440                     Value.begin(), Value.end());
441       InsertOffset += Value.size();
442     }
443 
444     // Match the newly constructed regex.
445     RegExToMatch = TmpStr;
446   }
447 
448   SmallVector<StringRef, 4> MatchInfo;
449   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
450     return StringRef::npos;
451 
452   // Successful regex match.
453   assert(!MatchInfo.empty() && "Didn't get any match");
454   StringRef FullMatch = MatchInfo[0];
455 
456   // If this defines any variables, remember their values.
457   for (const auto &VariableDef : VariableDefs) {
458     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
459     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
460   }
461 
462   MatchLen = FullMatch.size();
463   return FullMatch.data() - Buffer.data();
464 }
465 
466 
467 /// Computes an arbitrary estimate for the quality of matching this pattern at
468 /// the start of \p Buffer; a distance of zero should correspond to a perfect
469 /// match.
470 unsigned
471 Pattern::ComputeMatchDistance(StringRef Buffer,
472                               const StringMap<StringRef> &VariableTable) const {
473   // Just compute the number of matching characters. For regular expressions, we
474   // just compare against the regex itself and hope for the best.
475   //
476   // FIXME: One easy improvement here is have the regex lib generate a single
477   // example regular expression which matches, and use that as the example
478   // string.
479   StringRef ExampleString(FixedStr);
480   if (ExampleString.empty())
481     ExampleString = RegExStr;
482 
483   // Only compare up to the first line in the buffer, or the string size.
484   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
485   BufferPrefix = BufferPrefix.split('\n').first;
486   return BufferPrefix.edit_distance(ExampleString);
487 }
488 
489 /// Prints additional information about a failure to match involving this
490 /// pattern.
491 void Pattern::PrintFailureInfo(
492     const SourceMgr &SM, StringRef Buffer,
493     const StringMap<StringRef> &VariableTable) const {
494   // If this was a regular expression using variables, print the current
495   // variable values.
496   if (!VariableUses.empty()) {
497     for (const auto &VariableUse : VariableUses) {
498       SmallString<256> Msg;
499       raw_svector_ostream OS(Msg);
500       StringRef Var = VariableUse.first;
501       if (Var[0] == '@') {
502         std::string Value;
503         if (EvaluateExpression(Var, Value)) {
504           OS << "with expression \"";
505           OS.write_escaped(Var) << "\" equal to \"";
506           OS.write_escaped(Value) << "\"";
507         } else {
508           OS << "uses incorrect expression \"";
509           OS.write_escaped(Var) << "\"";
510         }
511       } else {
512         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
513 
514         // Check for undefined variable references.
515         if (it == VariableTable.end()) {
516           OS << "uses undefined variable \"";
517           OS.write_escaped(Var) << "\"";
518         } else {
519           OS << "with variable \"";
520           OS.write_escaped(Var) << "\" equal to \"";
521           OS.write_escaped(it->second) << "\"";
522         }
523       }
524 
525       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
526                       OS.str());
527     }
528   }
529 
530   // Attempt to find the closest/best fuzzy match.  Usually an error happens
531   // because some string in the output didn't exactly match. In these cases, we
532   // would like to show the user a best guess at what "should have" matched, to
533   // save them having to actually check the input manually.
534   size_t NumLinesForward = 0;
535   size_t Best = StringRef::npos;
536   double BestQuality = 0;
537 
538   // Use an arbitrary 4k limit on how far we will search.
539   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
540     if (Buffer[i] == '\n')
541       ++NumLinesForward;
542 
543     // Patterns have leading whitespace stripped, so skip whitespace when
544     // looking for something which looks like a pattern.
545     if (Buffer[i] == ' ' || Buffer[i] == '\t')
546       continue;
547 
548     // Compute the "quality" of this match as an arbitrary combination of the
549     // match distance and the number of lines skipped to get to this match.
550     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
551     double Quality = Distance + (NumLinesForward / 100.);
552 
553     if (Quality < BestQuality || Best == StringRef::npos) {
554       Best = i;
555       BestQuality = Quality;
556     }
557   }
558 
559   // Print the "possible intended match here" line if we found something
560   // reasonable and not equal to what we showed in the "scanning from here"
561   // line.
562   if (Best && Best != StringRef::npos && BestQuality < 50) {
563     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
564                     SourceMgr::DK_Note, "possible intended match here");
565 
566     // FIXME: If we wanted to be really friendly we would show why the match
567     // failed, as it can be hard to spot simple one character differences.
568   }
569 }
570 
571 /// Finds the closing sequence of a regex variable usage or definition.
572 ///
573 /// \p Str has to point in the beginning of the definition (right after the
574 /// opening sequence). Returns the offset of the closing sequence within Str,
575 /// or npos if it was not found.
576 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
577   // Offset keeps track of the current offset within the input Str
578   size_t Offset = 0;
579   // [...] Nesting depth
580   size_t BracketDepth = 0;
581 
582   while (!Str.empty()) {
583     if (Str.startswith("]]") && BracketDepth == 0)
584       return Offset;
585     if (Str[0] == '\\') {
586       // Backslash escapes the next char within regexes, so skip them both.
587       Str = Str.substr(2);
588       Offset += 2;
589     } else {
590       switch (Str[0]) {
591       default:
592         break;
593       case '[':
594         BracketDepth++;
595         break;
596       case ']':
597         if (BracketDepth == 0) {
598           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
599                           SourceMgr::DK_Error,
600                           "missing closing \"]\" for regex variable");
601           exit(1);
602         }
603         BracketDepth--;
604         break;
605       }
606       Str = Str.substr(1);
607       Offset++;
608     }
609   }
610 
611   return StringRef::npos;
612 }
613 
614 //===----------------------------------------------------------------------===//
615 // Check Strings.
616 //===----------------------------------------------------------------------===//
617 
618 /// A check that we found in the input file.
619 struct CheckString {
620   /// The pattern to match.
621   Pattern Pat;
622 
623   /// Which prefix name this check matched.
624   StringRef Prefix;
625 
626   /// The location in the match file that the check string was specified.
627   SMLoc Loc;
628 
629   /// All of the strings that are disallowed from occurring between this match
630   /// string and the previous one (or start of file).
631   std::vector<Pattern> DagNotStrings;
632 
633   CheckString(const Pattern &P, StringRef S, SMLoc L)
634       : Pat(P), Prefix(S), Loc(L) {}
635 
636   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
637                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
638 
639   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
640   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
641   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
642                 const std::vector<const Pattern *> &NotStrings,
643                 StringMap<StringRef> &VariableTable) const;
644   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
645                   std::vector<const Pattern *> &NotStrings,
646                   StringMap<StringRef> &VariableTable) const;
647 };
648 
649 /// Canonicalize whitespaces in the file. Line endings are replaced with
650 /// UNIX-style '\n'.
651 static StringRef CanonicalizeFile(MemoryBuffer &MB,
652                                   SmallVectorImpl<char> &OutputBuffer) {
653   OutputBuffer.reserve(MB.getBufferSize());
654 
655   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
656        Ptr != End; ++Ptr) {
657     // Eliminate trailing dosish \r.
658     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
659       continue;
660     }
661 
662     // If current char is not a horizontal whitespace or if horizontal
663     // whitespace canonicalization is disabled, dump it to output as is.
664     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
665       OutputBuffer.push_back(*Ptr);
666       continue;
667     }
668 
669     // Otherwise, add one space and advance over neighboring space.
670     OutputBuffer.push_back(' ');
671     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
672       ++Ptr;
673   }
674 
675   // Add a null byte and then return all but that byte.
676   OutputBuffer.push_back('\0');
677   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
678 }
679 
680 static bool IsPartOfWord(char c) {
681   return (isalnum(c) || c == '-' || c == '_');
682 }
683 
684 // Get the size of the prefix extension.
685 static size_t CheckTypeSize(Check::CheckType Ty) {
686   switch (Ty) {
687   case Check::CheckNone:
688   case Check::CheckBadNot:
689     return 0;
690 
691   case Check::CheckPlain:
692     return sizeof(":") - 1;
693 
694   case Check::CheckNext:
695     return sizeof("-NEXT:") - 1;
696 
697   case Check::CheckSame:
698     return sizeof("-SAME:") - 1;
699 
700   case Check::CheckNot:
701     return sizeof("-NOT:") - 1;
702 
703   case Check::CheckDAG:
704     return sizeof("-DAG:") - 1;
705 
706   case Check::CheckLabel:
707     return sizeof("-LABEL:") - 1;
708 
709   case Check::CheckEOF:
710     llvm_unreachable("Should not be using EOF size");
711   }
712 
713   llvm_unreachable("Bad check type");
714 }
715 
716 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
717   char NextChar = Buffer[Prefix.size()];
718 
719   // Verify that the : is present after the prefix.
720   if (NextChar == ':')
721     return Check::CheckPlain;
722 
723   if (NextChar != '-')
724     return Check::CheckNone;
725 
726   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
727   if (Rest.startswith("NEXT:"))
728     return Check::CheckNext;
729 
730   if (Rest.startswith("SAME:"))
731     return Check::CheckSame;
732 
733   if (Rest.startswith("NOT:"))
734     return Check::CheckNot;
735 
736   if (Rest.startswith("DAG:"))
737     return Check::CheckDAG;
738 
739   if (Rest.startswith("LABEL:"))
740     return Check::CheckLabel;
741 
742   // You can't combine -NOT with another suffix.
743   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
744       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
745       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
746     return Check::CheckBadNot;
747 
748   return Check::CheckNone;
749 }
750 
751 // From the given position, find the next character after the word.
752 static size_t SkipWord(StringRef Str, size_t Loc) {
753   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
754     ++Loc;
755   return Loc;
756 }
757 
758 /// Search the buffer for the first prefix in the prefix regular expression.
759 ///
760 /// This searches the buffer using the provided regular expression, however it
761 /// enforces constraints beyond that:
762 /// 1) The found prefix must not be a suffix of something that looks like
763 ///    a valid prefix.
764 /// 2) The found prefix must be followed by a valid check type suffix using \c
765 ///    FindCheckType above.
766 ///
767 /// The first match of the regular expression to satisfy these two is returned,
768 /// otherwise an empty StringRef is returned to indicate failure.
769 ///
770 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
771 /// start at the beginning of the returned prefix, increment \p LineNumber for
772 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
773 /// check found by examining the suffix.
774 ///
775 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
776 /// is unspecified.
777 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
778                                          unsigned &LineNumber,
779                                          Check::CheckType &CheckTy) {
780   SmallVector<StringRef, 2> Matches;
781 
782   while (!Buffer.empty()) {
783     // Find the first (longest) match using the RE.
784     if (!PrefixRE.match(Buffer, &Matches))
785       // No match at all, bail.
786       return StringRef();
787 
788     StringRef Prefix = Matches[0];
789     Matches.clear();
790 
791     assert(Prefix.data() >= Buffer.data() &&
792            Prefix.data() < Buffer.data() + Buffer.size() &&
793            "Prefix doesn't start inside of buffer!");
794     size_t Loc = Prefix.data() - Buffer.data();
795     StringRef Skipped = Buffer.substr(0, Loc);
796     Buffer = Buffer.drop_front(Loc);
797     LineNumber += Skipped.count('\n');
798 
799     // Check that the matched prefix isn't a suffix of some other check-like
800     // word.
801     // FIXME: This is a very ad-hoc check. it would be better handled in some
802     // other way. Among other things it seems hard to distinguish between
803     // intentional and unintentional uses of this feature.
804     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
805       // Now extract the type.
806       CheckTy = FindCheckType(Buffer, Prefix);
807 
808       // If we've found a valid check type for this prefix, we're done.
809       if (CheckTy != Check::CheckNone)
810         return Prefix;
811     }
812 
813     // If we didn't successfully find a prefix, we need to skip this invalid
814     // prefix and continue scanning. We directly skip the prefix that was
815     // matched and any additional parts of that check-like word.
816     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
817   }
818 
819   // We ran out of buffer while skipping partial matches so give up.
820   return StringRef();
821 }
822 
823 /// Read the check file, which specifies the sequence of expected strings.
824 ///
825 /// The strings are added to the CheckStrings vector. Returns true in case of
826 /// an error, false otherwise.
827 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
828                           std::vector<CheckString> &CheckStrings) {
829   std::vector<Pattern> ImplicitNegativeChecks;
830   for (const auto &PatternString : ImplicitCheckNot) {
831     // Create a buffer with fake command line content in order to display the
832     // command line option responsible for the specific implicit CHECK-NOT.
833     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
834     std::string Suffix = "'";
835     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
836         Prefix + PatternString + Suffix, "command line");
837 
838     StringRef PatternInBuffer =
839         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
840     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
841 
842     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
843     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
844                                                "IMPLICIT-CHECK", SM, 0);
845   }
846 
847   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
848 
849   // LineNumber keeps track of the line on which CheckPrefix instances are
850   // found.
851   unsigned LineNumber = 1;
852 
853   while (1) {
854     Check::CheckType CheckTy;
855 
856     // See if a prefix occurs in the memory buffer.
857     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
858                                                    CheckTy);
859     if (UsedPrefix.empty())
860       break;
861     assert(UsedPrefix.data() == Buffer.data() &&
862            "Failed to move Buffer's start forward, or pointed prefix outside "
863            "of the buffer!");
864 
865     // Location to use for error messages.
866     const char *UsedPrefixStart = UsedPrefix.data();
867 
868     // Skip the buffer to the end.
869     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
870 
871     // Complain about useful-looking but unsupported suffixes.
872     if (CheckTy == Check::CheckBadNot) {
873       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
874                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
875       return true;
876     }
877 
878     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
879     // leading whitespace.
880     if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
881       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
882 
883     // Scan ahead to the end of line.
884     size_t EOL = Buffer.find_first_of("\n\r");
885 
886     // Remember the location of the start of the pattern, for diagnostics.
887     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
888 
889     // Parse the pattern.
890     Pattern P(CheckTy);
891     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
892       return true;
893 
894     // Verify that CHECK-LABEL lines do not define or use variables
895     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
896       SM.PrintMessage(
897           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
898           "found '" + UsedPrefix + "-LABEL:'"
899                                    " with variable definition or use");
900       return true;
901     }
902 
903     Buffer = Buffer.substr(EOL);
904 
905     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
906     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
907         CheckStrings.empty()) {
908       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
909       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
910                       SourceMgr::DK_Error,
911                       "found '" + UsedPrefix + "-" + Type +
912                           "' without previous '" + UsedPrefix + ": line");
913       return true;
914     }
915 
916     // Handle CHECK-DAG/-NOT.
917     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
918       DagNotMatches.push_back(P);
919       continue;
920     }
921 
922     // Okay, add the string we captured to the output vector and move on.
923     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
924     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
925     DagNotMatches = ImplicitNegativeChecks;
926   }
927 
928   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
929   // prefix as a filler for the error message.
930   if (!DagNotMatches.empty()) {
931     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
932                               SMLoc::getFromPointer(Buffer.data()));
933     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
934   }
935 
936   if (CheckStrings.empty()) {
937     errs() << "error: no check strings found with prefix"
938            << (CheckPrefixes.size() > 1 ? "es " : " ");
939     prefix_iterator I = CheckPrefixes.begin();
940     prefix_iterator E = CheckPrefixes.end();
941     if (I != E) {
942       errs() << "\'" << *I << ":'";
943       ++I;
944     }
945     for (; I != E; ++I)
946       errs() << ", \'" << *I << ":'";
947 
948     errs() << '\n';
949     return true;
950   }
951 
952   return false;
953 }
954 
955 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
956                              StringRef Buffer,
957                              StringMap<StringRef> &VariableTable) {
958   // Otherwise, we have an error, emit an error message.
959   SM.PrintMessage(Loc, SourceMgr::DK_Error,
960                   "expected string not found in input");
961 
962   // Print the "scanning from here" line.  If the current position is at the
963   // end of a line, advance to the start of the next line.
964   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
965 
966   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
967                   "scanning from here");
968 
969   // Allow the pattern to print additional information if desired.
970   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
971 }
972 
973 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
974                              StringRef Buffer,
975                              StringMap<StringRef> &VariableTable) {
976   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
977 }
978 
979 /// Count the number of newlines in the specified range.
980 static unsigned CountNumNewlinesBetween(StringRef Range,
981                                         const char *&FirstNewLine) {
982   unsigned NumNewLines = 0;
983   while (1) {
984     // Scan for newline.
985     Range = Range.substr(Range.find_first_of("\n\r"));
986     if (Range.empty())
987       return NumNewLines;
988 
989     ++NumNewLines;
990 
991     // Handle \n\r and \r\n as a single newline.
992     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
993         (Range[0] != Range[1]))
994       Range = Range.substr(1);
995     Range = Range.substr(1);
996 
997     if (NumNewLines == 1)
998       FirstNewLine = Range.begin();
999   }
1000 }
1001 
1002 /// Match check string and its "not strings" and/or "dag strings".
1003 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1004                           bool IsLabelScanMode, size_t &MatchLen,
1005                           StringMap<StringRef> &VariableTable) const {
1006   size_t LastPos = 0;
1007   std::vector<const Pattern *> NotStrings;
1008 
1009   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1010   // bounds; we have not processed variable definitions within the bounded block
1011   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1012   // over the block again (including the last CHECK-LABEL) in normal mode.
1013   if (!IsLabelScanMode) {
1014     // Match "dag strings" (with mixed "not strings" if any).
1015     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1016     if (LastPos == StringRef::npos)
1017       return StringRef::npos;
1018   }
1019 
1020   // Match itself from the last position after matching CHECK-DAG.
1021   StringRef MatchBuffer = Buffer.substr(LastPos);
1022   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1023   if (MatchPos == StringRef::npos) {
1024     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1025     return StringRef::npos;
1026   }
1027 
1028   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1029   // or CHECK-NOT
1030   if (!IsLabelScanMode) {
1031     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1032 
1033     // If this check is a "CHECK-NEXT", verify that the previous match was on
1034     // the previous line (i.e. that there is one newline between them).
1035     if (CheckNext(SM, SkippedRegion))
1036       return StringRef::npos;
1037 
1038     // If this check is a "CHECK-SAME", verify that the previous match was on
1039     // the same line (i.e. that there is no newline between them).
1040     if (CheckSame(SM, SkippedRegion))
1041       return StringRef::npos;
1042 
1043     // If this match had "not strings", verify that they don't exist in the
1044     // skipped region.
1045     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1046       return StringRef::npos;
1047   }
1048 
1049   return LastPos + MatchPos;
1050 }
1051 
1052 /// Verify there is a single line in the given buffer.
1053 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1054   if (Pat.getCheckTy() != Check::CheckNext)
1055     return false;
1056 
1057   // Count the number of newlines between the previous match and this one.
1058   assert(Buffer.data() !=
1059              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1060                                     SMLoc::getFromPointer(Buffer.data())))
1061                  ->getBufferStart() &&
1062          "CHECK-NEXT can't be the first check in a file");
1063 
1064   const char *FirstNewLine = nullptr;
1065   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1066 
1067   if (NumNewLines == 0) {
1068     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1069                     Prefix + "-NEXT: is on the same line as previous match");
1070     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1071                     "'next' match was here");
1072     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1073                     "previous match ended here");
1074     return true;
1075   }
1076 
1077   if (NumNewLines != 1) {
1078     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1079                     Prefix +
1080                         "-NEXT: is not on the line after the previous match");
1081     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1082                     "'next' match was here");
1083     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1084                     "previous match ended here");
1085     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1086                     "non-matching line after previous match is here");
1087     return true;
1088   }
1089 
1090   return false;
1091 }
1092 
1093 /// Verify there is no newline in the given buffer.
1094 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1095   if (Pat.getCheckTy() != Check::CheckSame)
1096     return false;
1097 
1098   // Count the number of newlines between the previous match and this one.
1099   assert(Buffer.data() !=
1100              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1101                                     SMLoc::getFromPointer(Buffer.data())))
1102                  ->getBufferStart() &&
1103          "CHECK-SAME can't be the first check in a file");
1104 
1105   const char *FirstNewLine = nullptr;
1106   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1107 
1108   if (NumNewLines != 0) {
1109     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1110                     Prefix +
1111                         "-SAME: is not on the same line as the previous match");
1112     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1113                     "'next' match was here");
1114     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1115                     "previous match ended here");
1116     return true;
1117   }
1118 
1119   return false;
1120 }
1121 
1122 /// Verify there's no "not strings" in the given buffer.
1123 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1124                            const std::vector<const Pattern *> &NotStrings,
1125                            StringMap<StringRef> &VariableTable) const {
1126   for (const Pattern *Pat : NotStrings) {
1127     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1128 
1129     size_t MatchLen = 0;
1130     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1131 
1132     if (Pos == StringRef::npos)
1133       continue;
1134 
1135     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
1136                     SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
1137     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1138                     Prefix + "-NOT: pattern specified here");
1139     return true;
1140   }
1141 
1142   return false;
1143 }
1144 
1145 /// Match "dag strings" and their mixed "not strings".
1146 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1147                              std::vector<const Pattern *> &NotStrings,
1148                              StringMap<StringRef> &VariableTable) const {
1149   if (DagNotStrings.empty())
1150     return 0;
1151 
1152   size_t LastPos = 0;
1153   size_t StartPos = LastPos;
1154 
1155   for (const Pattern &Pat : DagNotStrings) {
1156     assert((Pat.getCheckTy() == Check::CheckDAG ||
1157             Pat.getCheckTy() == Check::CheckNot) &&
1158            "Invalid CHECK-DAG or CHECK-NOT!");
1159 
1160     if (Pat.getCheckTy() == Check::CheckNot) {
1161       NotStrings.push_back(&Pat);
1162       continue;
1163     }
1164 
1165     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1166 
1167     size_t MatchLen = 0, MatchPos;
1168 
1169     // CHECK-DAG always matches from the start.
1170     StringRef MatchBuffer = Buffer.substr(StartPos);
1171     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1172     // With a group of CHECK-DAGs, a single mismatching means the match on
1173     // that group of CHECK-DAGs fails immediately.
1174     if (MatchPos == StringRef::npos) {
1175       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1176       return StringRef::npos;
1177     }
1178     // Re-calc it as the offset relative to the start of the original string.
1179     MatchPos += StartPos;
1180 
1181     if (!NotStrings.empty()) {
1182       if (MatchPos < LastPos) {
1183         // Reordered?
1184         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1185                         SourceMgr::DK_Error,
1186                         Prefix + "-DAG: found a match of CHECK-DAG"
1187                                  " reordering across a CHECK-NOT");
1188         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1189                         SourceMgr::DK_Note,
1190                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1191                                  " is found here");
1192         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1193                         Prefix + "-NOT: the crossed pattern specified"
1194                                  " here");
1195         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1196                         Prefix + "-DAG: the reordered pattern specified"
1197                                  " here");
1198         return StringRef::npos;
1199       }
1200       // All subsequent CHECK-DAGs should be matched from the farthest
1201       // position of all precedent CHECK-DAGs (including this one.)
1202       StartPos = LastPos;
1203       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1204       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1205       // region.
1206       StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos);
1207       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1208         return StringRef::npos;
1209       // Clear "not strings".
1210       NotStrings.clear();
1211     }
1212 
1213     // Update the last position with CHECK-DAG matches.
1214     LastPos = std::max(MatchPos + MatchLen, LastPos);
1215   }
1216 
1217   return LastPos;
1218 }
1219 
1220 // A check prefix must contain only alphanumeric, hyphens and underscores.
1221 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1222   Regex Validator("^[a-zA-Z0-9_-]*$");
1223   return Validator.match(CheckPrefix);
1224 }
1225 
1226 static bool ValidateCheckPrefixes() {
1227   StringSet<> PrefixSet;
1228 
1229   for (StringRef Prefix : CheckPrefixes) {
1230     // Reject empty prefixes.
1231     if (Prefix == "")
1232       return false;
1233 
1234     if (!PrefixSet.insert(Prefix).second)
1235       return false;
1236 
1237     if (!ValidateCheckPrefix(Prefix))
1238       return false;
1239   }
1240 
1241   return true;
1242 }
1243 
1244 // Combines the check prefixes into a single regex so that we can efficiently
1245 // scan for any of the set.
1246 //
1247 // The semantics are that the longest-match wins which matches our regex
1248 // library.
1249 static Regex buildCheckPrefixRegex() {
1250   // I don't think there's a way to specify an initial value for cl::list,
1251   // so if nothing was specified, add the default
1252   if (CheckPrefixes.empty())
1253     CheckPrefixes.push_back("CHECK");
1254 
1255   // We already validated the contents of CheckPrefixes so just concatenate
1256   // them as alternatives.
1257   SmallString<32> PrefixRegexStr;
1258   for (StringRef Prefix : CheckPrefixes) {
1259     if (Prefix != CheckPrefixes.front())
1260       PrefixRegexStr.push_back('|');
1261 
1262     PrefixRegexStr.append(Prefix);
1263   }
1264 
1265   return Regex(PrefixRegexStr);
1266 }
1267 
1268 static void DumpCommandLine(int argc, char **argv) {
1269   errs() << "FileCheck command line: ";
1270   for (int I = 0; I < argc; I++)
1271     errs() << " " << argv[I];
1272   errs() << "\n";
1273 }
1274 
1275 // Remove local variables from \p VariableTable. Global variables
1276 // (start with '$') are preserved.
1277 static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
1278   SmallVector<StringRef, 16> LocalVars;
1279   for (const auto &Var : VariableTable)
1280     if (Var.first()[0] != '$')
1281       LocalVars.push_back(Var.first());
1282 
1283   for (const auto &Var : LocalVars)
1284     VariableTable.erase(Var);
1285 }
1286 
1287 /// Check the input to FileCheck provided in the \p Buffer against the \p
1288 /// CheckStrings read from the check file.
1289 ///
1290 /// Returns false if the input fails to satisfy the checks.
1291 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1292                 ArrayRef<CheckString> CheckStrings) {
1293   bool ChecksFailed = false;
1294 
1295   /// VariableTable - This holds all the current filecheck variables.
1296   StringMap<StringRef> VariableTable;
1297 
1298   unsigned i = 0, j = 0, e = CheckStrings.size();
1299   while (true) {
1300     StringRef CheckRegion;
1301     if (j == e) {
1302       CheckRegion = Buffer;
1303     } else {
1304       const CheckString &CheckLabelStr = CheckStrings[j];
1305       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1306         ++j;
1307         continue;
1308       }
1309 
1310       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1311       size_t MatchLabelLen = 0;
1312       size_t MatchLabelPos =
1313           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
1314       if (MatchLabelPos == StringRef::npos)
1315         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1316         return false;
1317 
1318       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1319       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1320       ++j;
1321     }
1322 
1323     if (EnableVarScope)
1324       ClearLocalVars(VariableTable);
1325 
1326     for (; i != j; ++i) {
1327       const CheckString &CheckStr = CheckStrings[i];
1328 
1329       // Check each string within the scanned region, including a second check
1330       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1331       size_t MatchLen = 0;
1332       size_t MatchPos =
1333           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
1334 
1335       if (MatchPos == StringRef::npos) {
1336         ChecksFailed = true;
1337         i = j;
1338         break;
1339       }
1340 
1341       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1342     }
1343 
1344     if (j == e)
1345       break;
1346   }
1347 
1348   // Success if no checks failed.
1349   return !ChecksFailed;
1350 }
1351 
1352 int main(int argc, char **argv) {
1353   sys::PrintStackTraceOnErrorSignal(argv[0]);
1354   PrettyStackTraceProgram X(argc, argv);
1355   cl::ParseCommandLineOptions(argc, argv);
1356 
1357   if (!ValidateCheckPrefixes()) {
1358     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1359               "start with a letter and contain only alphanumeric characters, "
1360               "hyphens and underscores\n";
1361     return 2;
1362   }
1363 
1364   Regex PrefixRE = buildCheckPrefixRegex();
1365   std::string REError;
1366   if (!PrefixRE.isValid(REError)) {
1367     errs() << "Unable to combine check-prefix strings into a prefix regular "
1368               "expression! This is likely a bug in FileCheck's verification of "
1369               "the check-prefix strings. Regular expression parsing failed "
1370               "with the following error: "
1371            << REError << "\n";
1372     return 2;
1373   }
1374 
1375   SourceMgr SM;
1376 
1377   // Read the expected strings from the check file.
1378   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1379       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1380   if (std::error_code EC = CheckFileOrErr.getError()) {
1381     errs() << "Could not open check file '" << CheckFilename
1382            << "': " << EC.message() << '\n';
1383     return 2;
1384   }
1385   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1386 
1387   SmallString<4096> CheckFileBuffer;
1388   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
1389 
1390   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1391                             CheckFileText, CheckFile.getBufferIdentifier()),
1392                         SMLoc());
1393 
1394   std::vector<CheckString> CheckStrings;
1395   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
1396     return 2;
1397 
1398   // Open the file to check and add it to SourceMgr.
1399   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1400       MemoryBuffer::getFileOrSTDIN(InputFilename);
1401   if (std::error_code EC = InputFileOrErr.getError()) {
1402     errs() << "Could not open input file '" << InputFilename
1403            << "': " << EC.message() << '\n';
1404     return 2;
1405   }
1406   MemoryBuffer &InputFile = *InputFileOrErr.get();
1407 
1408   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1409     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1410     DumpCommandLine(argc, argv);
1411     return 2;
1412   }
1413 
1414   SmallString<4096> InputFileBuffer;
1415   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
1416 
1417   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1418                             InputFileText, InputFile.getBufferIdentifier()),
1419                         SMLoc());
1420 
1421   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1422 }
1423