xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision edf338856cc6e542e515d7537090ebbbccbdce9f)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an exit status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/InitLLVM.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <algorithm>
30 #include <cctype>
31 #include <list>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43                   cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string> CheckPrefixes(
46     "check-prefix",
47     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool> NoCanonicalizeWhiteSpace(
55     "strict-whitespace",
56     cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::list<std::string> GlobalDefines("D", cl::Prefix,
66     cl::desc("Define a variable to be used in capture patterns."),
67     cl::value_desc("VAR=VALUE"));
68 
69 static cl::opt<bool> AllowEmptyInput(
70     "allow-empty", cl::init(false),
71     cl::desc("Allow the input file to be empty. This is useful when making\n"
72              "checks that some error message does not occur, for example."));
73 
74 static cl::opt<bool> MatchFullLines(
75     "match-full-lines", cl::init(false),
76     cl::desc("Require all positive matches to cover an entire input line.\n"
77              "Allows leading and trailing whitespace if --strict-whitespace\n"
78              "is not also passed."));
79 
80 static cl::opt<bool> EnableVarScope(
81     "enable-var-scope", cl::init(false),
82     cl::desc("Enables scope for regex variables. Variables with names that\n"
83              "do not start with '$' will be reset at the beginning of\n"
84              "each CHECK-LABEL block."));
85 
86 static cl::opt<bool> AllowDeprecatedDagOverlap(
87     "allow-deprecated-dag-overlap", cl::init(false),
88     cl::desc("Enable overlapping among matches in a group of consecutive\n"
89              "CHECK-DAG directives.  This option is deprecated and is only\n"
90              "provided for convenience as old tests are migrated to the new\n"
91              "non-overlapping CHECK-DAG implementation.\n"));
92 
93 typedef cl::list<std::string>::const_iterator prefix_iterator;
94 
95 //===----------------------------------------------------------------------===//
96 // Pattern Handling Code.
97 //===----------------------------------------------------------------------===//
98 
99 namespace Check {
100 enum CheckType {
101   CheckNone = 0,
102   CheckPlain,
103   CheckNext,
104   CheckSame,
105   CheckNot,
106   CheckDAG,
107   CheckLabel,
108   CheckEmpty,
109 
110   /// Indicates the pattern only matches the end of file. This is used for
111   /// trailing CHECK-NOTs.
112   CheckEOF,
113 
114   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
115   CheckBadNot
116 };
117 }
118 
119 class Pattern {
120   SMLoc PatternLoc;
121 
122   /// A fixed string to match as the pattern or empty if this pattern requires
123   /// a regex match.
124   StringRef FixedStr;
125 
126   /// A regex string to match as the pattern or empty if this pattern requires
127   /// a fixed string to match.
128   std::string RegExStr;
129 
130   /// Entries in this vector map to uses of a variable in the pattern, e.g.
131   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
132   /// we'll get an entry in this vector that tells us to insert the value of
133   /// bar at offset 3.
134   std::vector<std::pair<StringRef, unsigned>> VariableUses;
135 
136   /// Maps definitions of variables to their parenthesized capture numbers.
137   ///
138   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
139   /// 1.
140   std::map<StringRef, unsigned> VariableDefs;
141 
142   Check::CheckType CheckTy;
143 
144   /// Contains the number of line this pattern is in.
145   unsigned LineNumber;
146 
147 public:
148   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
149 
150   /// Returns the location in source code.
151   SMLoc getLoc() const { return PatternLoc; }
152 
153   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
154                     unsigned LineNumber);
155   size_t Match(StringRef Buffer, size_t &MatchLen,
156                StringMap<StringRef> &VariableTable) const;
157   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
158                         const StringMap<StringRef> &VariableTable) const;
159 
160   bool hasVariable() const {
161     return !(VariableUses.empty() && VariableDefs.empty());
162   }
163 
164   Check::CheckType getCheckTy() const { return CheckTy; }
165 
166 private:
167   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
168   void AddBackrefToRegEx(unsigned BackrefNum);
169   unsigned
170   ComputeMatchDistance(StringRef Buffer,
171                        const StringMap<StringRef> &VariableTable) const;
172   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
173   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
174 };
175 
176 /// Parses the given string into the Pattern.
177 ///
178 /// \p Prefix provides which prefix is being matched, \p SM provides the
179 /// SourceMgr used for error reports, and \p LineNumber is the line number in
180 /// the input file from which the pattern string was read. Returns true in
181 /// case of an error, false otherwise.
182 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
183                            SourceMgr &SM, unsigned LineNumber) {
184   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
185 
186   this->LineNumber = LineNumber;
187   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
188 
189   if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
190     // Ignore trailing whitespace.
191     while (!PatternStr.empty() &&
192            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
193       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
194 
195   // Check that there is something on the line.
196   if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
197     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
198                     "found empty check string with prefix '" + Prefix + ":'");
199     return true;
200   }
201 
202   if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
203     SM.PrintMessage(
204         PatternLoc, SourceMgr::DK_Error,
205         "found non-empty check string for empty check with prefix '" + Prefix +
206             ":'");
207     return true;
208   }
209 
210   if (CheckTy == Check::CheckEmpty) {
211     RegExStr = "(\n$)";
212     return false;
213   }
214 
215   // Check to see if this is a fixed string, or if it has regex pieces.
216   if (!MatchFullLinesHere &&
217       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
218                                  PatternStr.find("[[") == StringRef::npos))) {
219     FixedStr = PatternStr;
220     return false;
221   }
222 
223   if (MatchFullLinesHere) {
224     RegExStr += '^';
225     if (!NoCanonicalizeWhiteSpace)
226       RegExStr += " *";
227   }
228 
229   // Paren value #0 is for the fully matched string.  Any new parenthesized
230   // values add from there.
231   unsigned CurParen = 1;
232 
233   // Otherwise, there is at least one regex piece.  Build up the regex pattern
234   // by escaping scary characters in fixed strings, building up one big regex.
235   while (!PatternStr.empty()) {
236     // RegEx matches.
237     if (PatternStr.startswith("{{")) {
238       // This is the start of a regex match.  Scan for the }}.
239       size_t End = PatternStr.find("}}");
240       if (End == StringRef::npos) {
241         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
242                         SourceMgr::DK_Error,
243                         "found start of regex string with no end '}}'");
244         return true;
245       }
246 
247       // Enclose {{}} patterns in parens just like [[]] even though we're not
248       // capturing the result for any purpose.  This is required in case the
249       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
250       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
251       RegExStr += '(';
252       ++CurParen;
253 
254       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
255         return true;
256       RegExStr += ')';
257 
258       PatternStr = PatternStr.substr(End + 2);
259       continue;
260     }
261 
262     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
263     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
264     // second form is [[foo]] which is a reference to foo.  The variable name
265     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
266     // it.  This is to catch some common errors.
267     if (PatternStr.startswith("[[")) {
268       // Find the closing bracket pair ending the match.  End is going to be an
269       // offset relative to the beginning of the match string.
270       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
271 
272       if (End == StringRef::npos) {
273         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
274                         SourceMgr::DK_Error,
275                         "invalid named regex reference, no ]] found");
276         return true;
277       }
278 
279       StringRef MatchStr = PatternStr.substr(2, End);
280       PatternStr = PatternStr.substr(End + 4);
281 
282       // Get the regex name (e.g. "foo").
283       size_t NameEnd = MatchStr.find(':');
284       StringRef Name = MatchStr.substr(0, NameEnd);
285 
286       if (Name.empty()) {
287         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
288                         "invalid name in named regex: empty name");
289         return true;
290       }
291 
292       // Verify that the name/expression is well formed. FileCheck currently
293       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
294       // is relaxed, more strict check is performed in \c EvaluateExpression.
295       bool IsExpression = false;
296       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
297         if (i == 0) {
298           if (Name[i] == '$')  // Global vars start with '$'
299             continue;
300           if (Name[i] == '@') {
301             if (NameEnd != StringRef::npos) {
302               SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
303                               SourceMgr::DK_Error,
304                               "invalid name in named regex definition");
305               return true;
306             }
307             IsExpression = true;
308             continue;
309           }
310         }
311         if (Name[i] != '_' && !isalnum(Name[i]) &&
312             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
313           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
314                           SourceMgr::DK_Error, "invalid name in named regex");
315           return true;
316         }
317       }
318 
319       // Name can't start with a digit.
320       if (isdigit(static_cast<unsigned char>(Name[0]))) {
321         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
322                         "invalid name in named regex");
323         return true;
324       }
325 
326       // Handle [[foo]].
327       if (NameEnd == StringRef::npos) {
328         // Handle variables that were defined earlier on the same line by
329         // emitting a backreference.
330         if (VariableDefs.find(Name) != VariableDefs.end()) {
331           unsigned VarParenNum = VariableDefs[Name];
332           if (VarParenNum < 1 || VarParenNum > 9) {
333             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
334                             SourceMgr::DK_Error,
335                             "Can't back-reference more than 9 variables");
336             return true;
337           }
338           AddBackrefToRegEx(VarParenNum);
339         } else {
340           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
341         }
342         continue;
343       }
344 
345       // Handle [[foo:.*]].
346       VariableDefs[Name] = CurParen;
347       RegExStr += '(';
348       ++CurParen;
349 
350       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
351         return true;
352 
353       RegExStr += ')';
354     }
355 
356     // Handle fixed string matches.
357     // Find the end, which is the start of the next regex.
358     size_t FixedMatchEnd = PatternStr.find("{{");
359     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
360     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
361     PatternStr = PatternStr.substr(FixedMatchEnd);
362   }
363 
364   if (MatchFullLinesHere) {
365     if (!NoCanonicalizeWhiteSpace)
366       RegExStr += " *";
367     RegExStr += '$';
368   }
369 
370   return false;
371 }
372 
373 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
374   Regex R(RS);
375   std::string Error;
376   if (!R.isValid(Error)) {
377     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
378                     "invalid regex: " + Error);
379     return true;
380   }
381 
382   RegExStr += RS.str();
383   CurParen += R.getNumMatches();
384   return false;
385 }
386 
387 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
388   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
389   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
390   RegExStr += Backref;
391 }
392 
393 /// Evaluates expression and stores the result to \p Value.
394 ///
395 /// Returns true on success and false when the expression has invalid syntax.
396 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
397   // The only supported expression is @LINE([\+-]\d+)?
398   if (!Expr.startswith("@LINE"))
399     return false;
400   Expr = Expr.substr(StringRef("@LINE").size());
401   int Offset = 0;
402   if (!Expr.empty()) {
403     if (Expr[0] == '+')
404       Expr = Expr.substr(1);
405     else if (Expr[0] != '-')
406       return false;
407     if (Expr.getAsInteger(10, Offset))
408       return false;
409   }
410   Value = llvm::itostr(LineNumber + Offset);
411   return true;
412 }
413 
414 /// Matches the pattern string against the input buffer \p Buffer
415 ///
416 /// This returns the position that is matched or npos if there is no match. If
417 /// there is a match, the size of the matched string is returned in \p
418 /// MatchLen.
419 ///
420 /// The \p VariableTable StringMap provides the current values of filecheck
421 /// variables and is updated if this match defines new values.
422 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
423                       StringMap<StringRef> &VariableTable) const {
424   // If this is the EOF pattern, match it immediately.
425   if (CheckTy == Check::CheckEOF) {
426     MatchLen = 0;
427     return Buffer.size();
428   }
429 
430   // If this is a fixed string pattern, just match it now.
431   if (!FixedStr.empty()) {
432     MatchLen = FixedStr.size();
433     return Buffer.find(FixedStr);
434   }
435 
436   // Regex match.
437 
438   // If there are variable uses, we need to create a temporary string with the
439   // actual value.
440   StringRef RegExToMatch = RegExStr;
441   std::string TmpStr;
442   if (!VariableUses.empty()) {
443     TmpStr = RegExStr;
444 
445     unsigned InsertOffset = 0;
446     for (const auto &VariableUse : VariableUses) {
447       std::string Value;
448 
449       if (VariableUse.first[0] == '@') {
450         if (!EvaluateExpression(VariableUse.first, Value))
451           return StringRef::npos;
452       } else {
453         StringMap<StringRef>::iterator it =
454             VariableTable.find(VariableUse.first);
455         // If the variable is undefined, return an error.
456         if (it == VariableTable.end())
457           return StringRef::npos;
458 
459         // Look up the value and escape it so that we can put it into the regex.
460         Value += Regex::escape(it->second);
461       }
462 
463       // Plop it into the regex at the adjusted offset.
464       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
465                     Value.begin(), Value.end());
466       InsertOffset += Value.size();
467     }
468 
469     // Match the newly constructed regex.
470     RegExToMatch = TmpStr;
471   }
472 
473   SmallVector<StringRef, 4> MatchInfo;
474   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
475     return StringRef::npos;
476 
477   // Successful regex match.
478   assert(!MatchInfo.empty() && "Didn't get any match");
479   StringRef FullMatch = MatchInfo[0];
480 
481   // If this defines any variables, remember their values.
482   for (const auto &VariableDef : VariableDefs) {
483     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
484     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
485   }
486 
487   MatchLen = FullMatch.size();
488   return FullMatch.data() - Buffer.data();
489 }
490 
491 
492 /// Computes an arbitrary estimate for the quality of matching this pattern at
493 /// the start of \p Buffer; a distance of zero should correspond to a perfect
494 /// match.
495 unsigned
496 Pattern::ComputeMatchDistance(StringRef Buffer,
497                               const StringMap<StringRef> &VariableTable) const {
498   // Just compute the number of matching characters. For regular expressions, we
499   // just compare against the regex itself and hope for the best.
500   //
501   // FIXME: One easy improvement here is have the regex lib generate a single
502   // example regular expression which matches, and use that as the example
503   // string.
504   StringRef ExampleString(FixedStr);
505   if (ExampleString.empty())
506     ExampleString = RegExStr;
507 
508   // Only compare up to the first line in the buffer, or the string size.
509   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
510   BufferPrefix = BufferPrefix.split('\n').first;
511   return BufferPrefix.edit_distance(ExampleString);
512 }
513 
514 /// Prints additional information about a failure to match involving this
515 /// pattern.
516 void Pattern::PrintFailureInfo(
517     const SourceMgr &SM, StringRef Buffer,
518     const StringMap<StringRef> &VariableTable) const {
519   // If this was a regular expression using variables, print the current
520   // variable values.
521   if (!VariableUses.empty()) {
522     for (const auto &VariableUse : VariableUses) {
523       SmallString<256> Msg;
524       raw_svector_ostream OS(Msg);
525       StringRef Var = VariableUse.first;
526       if (Var[0] == '@') {
527         std::string Value;
528         if (EvaluateExpression(Var, Value)) {
529           OS << "with expression \"";
530           OS.write_escaped(Var) << "\" equal to \"";
531           OS.write_escaped(Value) << "\"";
532         } else {
533           OS << "uses incorrect expression \"";
534           OS.write_escaped(Var) << "\"";
535         }
536       } else {
537         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
538 
539         // Check for undefined variable references.
540         if (it == VariableTable.end()) {
541           OS << "uses undefined variable \"";
542           OS.write_escaped(Var) << "\"";
543         } else {
544           OS << "with variable \"";
545           OS.write_escaped(Var) << "\" equal to \"";
546           OS.write_escaped(it->second) << "\"";
547         }
548       }
549 
550       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
551                       OS.str());
552     }
553   }
554 
555   // Attempt to find the closest/best fuzzy match.  Usually an error happens
556   // because some string in the output didn't exactly match. In these cases, we
557   // would like to show the user a best guess at what "should have" matched, to
558   // save them having to actually check the input manually.
559   size_t NumLinesForward = 0;
560   size_t Best = StringRef::npos;
561   double BestQuality = 0;
562 
563   // Use an arbitrary 4k limit on how far we will search.
564   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
565     if (Buffer[i] == '\n')
566       ++NumLinesForward;
567 
568     // Patterns have leading whitespace stripped, so skip whitespace when
569     // looking for something which looks like a pattern.
570     if (Buffer[i] == ' ' || Buffer[i] == '\t')
571       continue;
572 
573     // Compute the "quality" of this match as an arbitrary combination of the
574     // match distance and the number of lines skipped to get to this match.
575     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
576     double Quality = Distance + (NumLinesForward / 100.);
577 
578     if (Quality < BestQuality || Best == StringRef::npos) {
579       Best = i;
580       BestQuality = Quality;
581     }
582   }
583 
584   // Print the "possible intended match here" line if we found something
585   // reasonable and not equal to what we showed in the "scanning from here"
586   // line.
587   if (Best && Best != StringRef::npos && BestQuality < 50) {
588     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
589                     SourceMgr::DK_Note, "possible intended match here");
590 
591     // FIXME: If we wanted to be really friendly we would show why the match
592     // failed, as it can be hard to spot simple one character differences.
593   }
594 }
595 
596 /// Finds the closing sequence of a regex variable usage or definition.
597 ///
598 /// \p Str has to point in the beginning of the definition (right after the
599 /// opening sequence). Returns the offset of the closing sequence within Str,
600 /// or npos if it was not found.
601 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
602   // Offset keeps track of the current offset within the input Str
603   size_t Offset = 0;
604   // [...] Nesting depth
605   size_t BracketDepth = 0;
606 
607   while (!Str.empty()) {
608     if (Str.startswith("]]") && BracketDepth == 0)
609       return Offset;
610     if (Str[0] == '\\') {
611       // Backslash escapes the next char within regexes, so skip them both.
612       Str = Str.substr(2);
613       Offset += 2;
614     } else {
615       switch (Str[0]) {
616       default:
617         break;
618       case '[':
619         BracketDepth++;
620         break;
621       case ']':
622         if (BracketDepth == 0) {
623           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
624                           SourceMgr::DK_Error,
625                           "missing closing \"]\" for regex variable");
626           exit(1);
627         }
628         BracketDepth--;
629         break;
630       }
631       Str = Str.substr(1);
632       Offset++;
633     }
634   }
635 
636   return StringRef::npos;
637 }
638 
639 //===----------------------------------------------------------------------===//
640 // Check Strings.
641 //===----------------------------------------------------------------------===//
642 
643 /// A check that we found in the input file.
644 struct CheckString {
645   /// The pattern to match.
646   Pattern Pat;
647 
648   /// Which prefix name this check matched.
649   StringRef Prefix;
650 
651   /// The location in the match file that the check string was specified.
652   SMLoc Loc;
653 
654   /// All of the strings that are disallowed from occurring between this match
655   /// string and the previous one (or start of file).
656   std::vector<Pattern> DagNotStrings;
657 
658   CheckString(const Pattern &P, StringRef S, SMLoc L)
659       : Pat(P), Prefix(S), Loc(L) {}
660 
661   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
662                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
663 
664   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
665   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
666   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
667                 const std::vector<const Pattern *> &NotStrings,
668                 StringMap<StringRef> &VariableTable) const;
669   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
670                   std::vector<const Pattern *> &NotStrings,
671                   StringMap<StringRef> &VariableTable) const;
672 };
673 
674 /// Canonicalize whitespaces in the file. Line endings are replaced with
675 /// UNIX-style '\n'.
676 static StringRef CanonicalizeFile(MemoryBuffer &MB,
677                                   SmallVectorImpl<char> &OutputBuffer) {
678   OutputBuffer.reserve(MB.getBufferSize());
679 
680   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
681        Ptr != End; ++Ptr) {
682     // Eliminate trailing dosish \r.
683     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
684       continue;
685     }
686 
687     // If current char is not a horizontal whitespace or if horizontal
688     // whitespace canonicalization is disabled, dump it to output as is.
689     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
690       OutputBuffer.push_back(*Ptr);
691       continue;
692     }
693 
694     // Otherwise, add one space and advance over neighboring space.
695     OutputBuffer.push_back(' ');
696     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
697       ++Ptr;
698   }
699 
700   // Add a null byte and then return all but that byte.
701   OutputBuffer.push_back('\0');
702   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
703 }
704 
705 static bool IsPartOfWord(char c) {
706   return (isalnum(c) || c == '-' || c == '_');
707 }
708 
709 // Get the size of the prefix extension.
710 static size_t CheckTypeSize(Check::CheckType Ty) {
711   switch (Ty) {
712   case Check::CheckNone:
713   case Check::CheckBadNot:
714     return 0;
715 
716   case Check::CheckPlain:
717     return sizeof(":") - 1;
718 
719   case Check::CheckNext:
720     return sizeof("-NEXT:") - 1;
721 
722   case Check::CheckSame:
723     return sizeof("-SAME:") - 1;
724 
725   case Check::CheckNot:
726     return sizeof("-NOT:") - 1;
727 
728   case Check::CheckDAG:
729     return sizeof("-DAG:") - 1;
730 
731   case Check::CheckLabel:
732     return sizeof("-LABEL:") - 1;
733 
734   case Check::CheckEmpty:
735     return sizeof("-EMPTY:") - 1;
736 
737   case Check::CheckEOF:
738     llvm_unreachable("Should not be using EOF size");
739   }
740 
741   llvm_unreachable("Bad check type");
742 }
743 
744 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
745   if (Buffer.size() <= Prefix.size())
746     return Check::CheckNone;
747 
748   char NextChar = Buffer[Prefix.size()];
749 
750   // Verify that the : is present after the prefix.
751   if (NextChar == ':')
752     return Check::CheckPlain;
753 
754   if (NextChar != '-')
755     return Check::CheckNone;
756 
757   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
758   if (Rest.startswith("NEXT:"))
759     return Check::CheckNext;
760 
761   if (Rest.startswith("SAME:"))
762     return Check::CheckSame;
763 
764   if (Rest.startswith("NOT:"))
765     return Check::CheckNot;
766 
767   if (Rest.startswith("DAG:"))
768     return Check::CheckDAG;
769 
770   if (Rest.startswith("LABEL:"))
771     return Check::CheckLabel;
772 
773   if (Rest.startswith("EMPTY:"))
774     return Check::CheckEmpty;
775 
776   // You can't combine -NOT with another suffix.
777   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
778       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
779       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
780       Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
781     return Check::CheckBadNot;
782 
783   return Check::CheckNone;
784 }
785 
786 // From the given position, find the next character after the word.
787 static size_t SkipWord(StringRef Str, size_t Loc) {
788   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
789     ++Loc;
790   return Loc;
791 }
792 
793 /// Search the buffer for the first prefix in the prefix regular expression.
794 ///
795 /// This searches the buffer using the provided regular expression, however it
796 /// enforces constraints beyond that:
797 /// 1) The found prefix must not be a suffix of something that looks like
798 ///    a valid prefix.
799 /// 2) The found prefix must be followed by a valid check type suffix using \c
800 ///    FindCheckType above.
801 ///
802 /// The first match of the regular expression to satisfy these two is returned,
803 /// otherwise an empty StringRef is returned to indicate failure.
804 ///
805 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
806 /// start at the beginning of the returned prefix, increment \p LineNumber for
807 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
808 /// check found by examining the suffix.
809 ///
810 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
811 /// is unspecified.
812 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
813                                          unsigned &LineNumber,
814                                          Check::CheckType &CheckTy) {
815   SmallVector<StringRef, 2> Matches;
816 
817   while (!Buffer.empty()) {
818     // Find the first (longest) match using the RE.
819     if (!PrefixRE.match(Buffer, &Matches))
820       // No match at all, bail.
821       return StringRef();
822 
823     StringRef Prefix = Matches[0];
824     Matches.clear();
825 
826     assert(Prefix.data() >= Buffer.data() &&
827            Prefix.data() < Buffer.data() + Buffer.size() &&
828            "Prefix doesn't start inside of buffer!");
829     size_t Loc = Prefix.data() - Buffer.data();
830     StringRef Skipped = Buffer.substr(0, Loc);
831     Buffer = Buffer.drop_front(Loc);
832     LineNumber += Skipped.count('\n');
833 
834     // Check that the matched prefix isn't a suffix of some other check-like
835     // word.
836     // FIXME: This is a very ad-hoc check. it would be better handled in some
837     // other way. Among other things it seems hard to distinguish between
838     // intentional and unintentional uses of this feature.
839     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
840       // Now extract the type.
841       CheckTy = FindCheckType(Buffer, Prefix);
842 
843       // If we've found a valid check type for this prefix, we're done.
844       if (CheckTy != Check::CheckNone)
845         return Prefix;
846     }
847 
848     // If we didn't successfully find a prefix, we need to skip this invalid
849     // prefix and continue scanning. We directly skip the prefix that was
850     // matched and any additional parts of that check-like word.
851     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
852   }
853 
854   // We ran out of buffer while skipping partial matches so give up.
855   return StringRef();
856 }
857 
858 /// Read the check file, which specifies the sequence of expected strings.
859 ///
860 /// The strings are added to the CheckStrings vector. Returns true in case of
861 /// an error, false otherwise.
862 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
863                           std::vector<CheckString> &CheckStrings) {
864   std::vector<Pattern> ImplicitNegativeChecks;
865   for (const auto &PatternString : ImplicitCheckNot) {
866     // Create a buffer with fake command line content in order to display the
867     // command line option responsible for the specific implicit CHECK-NOT.
868     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
869     std::string Suffix = "'";
870     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
871         Prefix + PatternString + Suffix, "command line");
872 
873     StringRef PatternInBuffer =
874         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
875     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
876 
877     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
878     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
879                                                "IMPLICIT-CHECK", SM, 0);
880   }
881 
882   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
883 
884   // LineNumber keeps track of the line on which CheckPrefix instances are
885   // found.
886   unsigned LineNumber = 1;
887 
888   while (1) {
889     Check::CheckType CheckTy;
890 
891     // See if a prefix occurs in the memory buffer.
892     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
893                                                    CheckTy);
894     if (UsedPrefix.empty())
895       break;
896     assert(UsedPrefix.data() == Buffer.data() &&
897            "Failed to move Buffer's start forward, or pointed prefix outside "
898            "of the buffer!");
899 
900     // Location to use for error messages.
901     const char *UsedPrefixStart = UsedPrefix.data();
902 
903     // Skip the buffer to the end.
904     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
905 
906     // Complain about useful-looking but unsupported suffixes.
907     if (CheckTy == Check::CheckBadNot) {
908       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
909                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
910       return true;
911     }
912 
913     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
914     // leading whitespace.
915     if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
916       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
917 
918     // Scan ahead to the end of line.
919     size_t EOL = Buffer.find_first_of("\n\r");
920 
921     // Remember the location of the start of the pattern, for diagnostics.
922     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
923 
924     // Parse the pattern.
925     Pattern P(CheckTy);
926     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
927       return true;
928 
929     // Verify that CHECK-LABEL lines do not define or use variables
930     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
931       SM.PrintMessage(
932           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
933           "found '" + UsedPrefix + "-LABEL:'"
934                                    " with variable definition or use");
935       return true;
936     }
937 
938     Buffer = Buffer.substr(EOL);
939 
940     // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
941     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
942          CheckTy == Check::CheckEmpty) &&
943         CheckStrings.empty()) {
944       StringRef Type = CheckTy == Check::CheckNext
945                            ? "NEXT"
946                            : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
947       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
948                       SourceMgr::DK_Error,
949                       "found '" + UsedPrefix + "-" + Type +
950                           "' without previous '" + UsedPrefix + ": line");
951       return true;
952     }
953 
954     // Handle CHECK-DAG/-NOT.
955     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
956       DagNotMatches.push_back(P);
957       continue;
958     }
959 
960     // Okay, add the string we captured to the output vector and move on.
961     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
962     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
963     DagNotMatches = ImplicitNegativeChecks;
964   }
965 
966   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
967   // prefix as a filler for the error message.
968   if (!DagNotMatches.empty()) {
969     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
970                               SMLoc::getFromPointer(Buffer.data()));
971     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
972   }
973 
974   if (CheckStrings.empty()) {
975     errs() << "error: no check strings found with prefix"
976            << (CheckPrefixes.size() > 1 ? "es " : " ");
977     prefix_iterator I = CheckPrefixes.begin();
978     prefix_iterator E = CheckPrefixes.end();
979     if (I != E) {
980       errs() << "\'" << *I << ":'";
981       ++I;
982     }
983     for (; I != E; ++I)
984       errs() << ", \'" << *I << ":'";
985 
986     errs() << '\n';
987     return true;
988   }
989 
990   return false;
991 }
992 
993 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
994                              StringRef Buffer,
995                              StringMap<StringRef> &VariableTable) {
996   // Otherwise, we have an error, emit an error message.
997   SM.PrintMessage(Loc, SourceMgr::DK_Error,
998                   "expected string not found in input");
999 
1000   // Print the "scanning from here" line.  If the current position is at the
1001   // end of a line, advance to the start of the next line.
1002   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1003 
1004   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1005                   "scanning from here");
1006 
1007   // Allow the pattern to print additional information if desired.
1008   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1009 }
1010 
1011 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1012                              StringRef Buffer,
1013                              StringMap<StringRef> &VariableTable) {
1014   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1015 }
1016 
1017 /// Count the number of newlines in the specified range.
1018 static unsigned CountNumNewlinesBetween(StringRef Range,
1019                                         const char *&FirstNewLine) {
1020   unsigned NumNewLines = 0;
1021   while (1) {
1022     // Scan for newline.
1023     Range = Range.substr(Range.find_first_of("\n\r"));
1024     if (Range.empty())
1025       return NumNewLines;
1026 
1027     ++NumNewLines;
1028 
1029     // Handle \n\r and \r\n as a single newline.
1030     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
1031         (Range[0] != Range[1]))
1032       Range = Range.substr(1);
1033     Range = Range.substr(1);
1034 
1035     if (NumNewLines == 1)
1036       FirstNewLine = Range.begin();
1037   }
1038 }
1039 
1040 /// Match check string and its "not strings" and/or "dag strings".
1041 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1042                           bool IsLabelScanMode, size_t &MatchLen,
1043                           StringMap<StringRef> &VariableTable) const {
1044   size_t LastPos = 0;
1045   std::vector<const Pattern *> NotStrings;
1046 
1047   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1048   // bounds; we have not processed variable definitions within the bounded block
1049   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1050   // over the block again (including the last CHECK-LABEL) in normal mode.
1051   if (!IsLabelScanMode) {
1052     // Match "dag strings" (with mixed "not strings" if any).
1053     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1054     if (LastPos == StringRef::npos)
1055       return StringRef::npos;
1056   }
1057 
1058   // Match itself from the last position after matching CHECK-DAG.
1059   StringRef MatchBuffer = Buffer.substr(LastPos);
1060   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1061   if (MatchPos == StringRef::npos) {
1062     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1063     return StringRef::npos;
1064   }
1065 
1066   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1067   // or CHECK-NOT
1068   if (!IsLabelScanMode) {
1069     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1070 
1071     // If this check is a "CHECK-NEXT", verify that the previous match was on
1072     // the previous line (i.e. that there is one newline between them).
1073     if (CheckNext(SM, SkippedRegion))
1074       return StringRef::npos;
1075 
1076     // If this check is a "CHECK-SAME", verify that the previous match was on
1077     // the same line (i.e. that there is no newline between them).
1078     if (CheckSame(SM, SkippedRegion))
1079       return StringRef::npos;
1080 
1081     // If this match had "not strings", verify that they don't exist in the
1082     // skipped region.
1083     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1084       return StringRef::npos;
1085   }
1086 
1087   return LastPos + MatchPos;
1088 }
1089 
1090 /// Verify there is a single line in the given buffer.
1091 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1092   if (Pat.getCheckTy() != Check::CheckNext &&
1093       Pat.getCheckTy() != Check::CheckEmpty)
1094     return false;
1095 
1096   Twine CheckName =
1097       Prefix +
1098       Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
1099 
1100   // Count the number of newlines between the previous match and this one.
1101   assert(Buffer.data() !=
1102              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1103                                     SMLoc::getFromPointer(Buffer.data())))
1104                  ->getBufferStart() &&
1105          "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
1106 
1107   const char *FirstNewLine = nullptr;
1108   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1109 
1110   // For CHECK-EMPTY, the preceding new line is consumed by the pattern, so
1111   // this needs to be re-added.
1112   if (Pat.getCheckTy() == Check::CheckEmpty)
1113     ++NumNewLines;
1114 
1115   if (NumNewLines == 0) {
1116     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1117                     CheckName + ": is on the same line as previous match");
1118     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1119                     "'next' match was here");
1120     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1121                     "previous match ended here");
1122     return true;
1123   }
1124 
1125   if (NumNewLines != 1) {
1126     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1127                     CheckName +
1128                         ": is not on the line after the previous match");
1129     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1130                     "'next' match was here");
1131     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1132                     "previous match ended here");
1133     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1134                     "non-matching line after previous match is here");
1135     return true;
1136   }
1137 
1138   return false;
1139 }
1140 
1141 /// Verify there is no newline in the given buffer.
1142 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1143   if (Pat.getCheckTy() != Check::CheckSame)
1144     return false;
1145 
1146   // Count the number of newlines between the previous match and this one.
1147   assert(Buffer.data() !=
1148              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1149                                     SMLoc::getFromPointer(Buffer.data())))
1150                  ->getBufferStart() &&
1151          "CHECK-SAME can't be the first check in a file");
1152 
1153   const char *FirstNewLine = nullptr;
1154   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1155 
1156   if (NumNewLines != 0) {
1157     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1158                     Prefix +
1159                         "-SAME: is not on the same line as the previous match");
1160     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1161                     "'next' match was here");
1162     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1163                     "previous match ended here");
1164     return true;
1165   }
1166 
1167   return false;
1168 }
1169 
1170 /// Verify there's no "not strings" in the given buffer.
1171 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1172                            const std::vector<const Pattern *> &NotStrings,
1173                            StringMap<StringRef> &VariableTable) const {
1174   for (const Pattern *Pat : NotStrings) {
1175     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1176 
1177     size_t MatchLen = 0;
1178     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1179 
1180     if (Pos == StringRef::npos)
1181       continue;
1182 
1183     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
1184                     SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
1185     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1186                     Prefix + "-NOT: pattern specified here");
1187     return true;
1188   }
1189 
1190   return false;
1191 }
1192 
1193 /// Match "dag strings" and their mixed "not strings".
1194 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1195                              std::vector<const Pattern *> &NotStrings,
1196                              StringMap<StringRef> &VariableTable) const {
1197   if (DagNotStrings.empty())
1198     return 0;
1199 
1200   size_t LastPos = 0;
1201   size_t StartPos = LastPos;
1202 
1203   // A sorted list of ranges for non-overlapping dag matches.
1204   struct Match {
1205     size_t Pos;
1206     size_t End;
1207   };
1208   std::list<Match> Matches;
1209 
1210   for (const Pattern &Pat : DagNotStrings) {
1211     assert((Pat.getCheckTy() == Check::CheckDAG ||
1212             Pat.getCheckTy() == Check::CheckNot) &&
1213            "Invalid CHECK-DAG or CHECK-NOT!");
1214 
1215     if (Pat.getCheckTy() == Check::CheckNot) {
1216       NotStrings.push_back(&Pat);
1217       continue;
1218     }
1219 
1220     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1221 
1222     // CHECK-DAG always matches from the start.
1223     size_t MatchLen = 0, MatchPos = StartPos;
1224 
1225     // Search for a match that doesn't overlap a previous match in this
1226     // CHECK-DAG group.
1227     for (auto MI = Matches.begin(), ME = Matches.end(); true; ++MI) {
1228       StringRef MatchBuffer = Buffer.substr(MatchPos);
1229       size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1230       // With a group of CHECK-DAGs, a single mismatching means the match on
1231       // that group of CHECK-DAGs fails immediately.
1232       if (MatchPosBuf == StringRef::npos) {
1233         PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1234         return StringRef::npos;
1235       }
1236       // Re-calc it as the offset relative to the start of the original string.
1237       MatchPos += MatchPosBuf;
1238       if (AllowDeprecatedDagOverlap)
1239         break;
1240       // Iterate previous matches until overlapping match or insertion point.
1241       Match M{MatchPos, MatchPos + MatchLen};
1242       bool Overlap = false;
1243       for (; MI != ME; ++MI) {
1244         if (M.Pos < MI->End) {
1245           // !Overlap => New match has no overlap and is before this old match.
1246           // Overlap => New match overlaps this old match.
1247           Overlap = MI->Pos < M.End;
1248           break;
1249         }
1250       }
1251       if (!Overlap) {
1252         // Insert non-overlapping match into list.
1253         Matches.insert(MI, M);
1254         break;
1255       }
1256       MatchPos = MI->End;
1257     }
1258 
1259     if (!NotStrings.empty()) {
1260       if (MatchPos < LastPos) {
1261         // Reordered?
1262         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1263                         SourceMgr::DK_Error,
1264                         Prefix + "-DAG: found a match of CHECK-DAG"
1265                                  " reordering across a CHECK-NOT");
1266         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1267                         SourceMgr::DK_Note,
1268                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1269                                  " is found here");
1270         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1271                         Prefix + "-NOT: the crossed pattern specified"
1272                                  " here");
1273         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1274                         Prefix + "-DAG: the reordered pattern specified"
1275                                  " here");
1276         return StringRef::npos;
1277       }
1278       // All subsequent CHECK-DAGs should be matched from the farthest
1279       // position of all precedent CHECK-DAGs (not including this one).
1280       StartPos = LastPos;
1281       // Don't waste time checking for (impossible) overlaps before that.
1282       Matches.clear();
1283       Matches.push_back(Match{MatchPos, MatchPos + MatchLen});
1284       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1285       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1286       // region.
1287       StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos);
1288       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1289         return StringRef::npos;
1290       // Clear "not strings".
1291       NotStrings.clear();
1292     }
1293 
1294     // Update the last position with CHECK-DAG matches.
1295     LastPos = std::max(MatchPos + MatchLen, LastPos);
1296   }
1297 
1298   return LastPos;
1299 }
1300 
1301 // A check prefix must contain only alphanumeric, hyphens and underscores.
1302 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1303   Regex Validator("^[a-zA-Z0-9_-]*$");
1304   return Validator.match(CheckPrefix);
1305 }
1306 
1307 static bool ValidateCheckPrefixes() {
1308   StringSet<> PrefixSet;
1309 
1310   for (StringRef Prefix : CheckPrefixes) {
1311     // Reject empty prefixes.
1312     if (Prefix == "")
1313       return false;
1314 
1315     if (!PrefixSet.insert(Prefix).second)
1316       return false;
1317 
1318     if (!ValidateCheckPrefix(Prefix))
1319       return false;
1320   }
1321 
1322   return true;
1323 }
1324 
1325 // Combines the check prefixes into a single regex so that we can efficiently
1326 // scan for any of the set.
1327 //
1328 // The semantics are that the longest-match wins which matches our regex
1329 // library.
1330 static Regex buildCheckPrefixRegex() {
1331   // I don't think there's a way to specify an initial value for cl::list,
1332   // so if nothing was specified, add the default
1333   if (CheckPrefixes.empty())
1334     CheckPrefixes.push_back("CHECK");
1335 
1336   // We already validated the contents of CheckPrefixes so just concatenate
1337   // them as alternatives.
1338   SmallString<32> PrefixRegexStr;
1339   for (StringRef Prefix : CheckPrefixes) {
1340     if (Prefix != CheckPrefixes.front())
1341       PrefixRegexStr.push_back('|');
1342 
1343     PrefixRegexStr.append(Prefix);
1344   }
1345 
1346   return Regex(PrefixRegexStr);
1347 }
1348 
1349 static void DumpCommandLine(int argc, char **argv) {
1350   errs() << "FileCheck command line: ";
1351   for (int I = 0; I < argc; I++)
1352     errs() << " " << argv[I];
1353   errs() << "\n";
1354 }
1355 
1356 // Remove local variables from \p VariableTable. Global variables
1357 // (start with '$') are preserved.
1358 static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
1359   SmallVector<StringRef, 16> LocalVars;
1360   for (const auto &Var : VariableTable)
1361     if (Var.first()[0] != '$')
1362       LocalVars.push_back(Var.first());
1363 
1364   for (const auto &Var : LocalVars)
1365     VariableTable.erase(Var);
1366 }
1367 
1368 /// Check the input to FileCheck provided in the \p Buffer against the \p
1369 /// CheckStrings read from the check file.
1370 ///
1371 /// Returns false if the input fails to satisfy the checks.
1372 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1373                 ArrayRef<CheckString> CheckStrings) {
1374   bool ChecksFailed = false;
1375 
1376   /// VariableTable - This holds all the current filecheck variables.
1377   StringMap<StringRef> VariableTable;
1378 
1379   for (const auto& Def : GlobalDefines)
1380     VariableTable.insert(StringRef(Def).split('='));
1381 
1382   unsigned i = 0, j = 0, e = CheckStrings.size();
1383   while (true) {
1384     StringRef CheckRegion;
1385     if (j == e) {
1386       CheckRegion = Buffer;
1387     } else {
1388       const CheckString &CheckLabelStr = CheckStrings[j];
1389       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1390         ++j;
1391         continue;
1392       }
1393 
1394       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1395       size_t MatchLabelLen = 0;
1396       size_t MatchLabelPos =
1397           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
1398       if (MatchLabelPos == StringRef::npos)
1399         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1400         return false;
1401 
1402       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1403       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1404       ++j;
1405     }
1406 
1407     if (EnableVarScope)
1408       ClearLocalVars(VariableTable);
1409 
1410     for (; i != j; ++i) {
1411       const CheckString &CheckStr = CheckStrings[i];
1412 
1413       // Check each string within the scanned region, including a second check
1414       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1415       size_t MatchLen = 0;
1416       size_t MatchPos =
1417           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
1418 
1419       if (MatchPos == StringRef::npos) {
1420         ChecksFailed = true;
1421         i = j;
1422         break;
1423       }
1424 
1425       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1426     }
1427 
1428     if (j == e)
1429       break;
1430   }
1431 
1432   // Success if no checks failed.
1433   return !ChecksFailed;
1434 }
1435 
1436 int main(int argc, char **argv) {
1437   InitLLVM X(argc, argv);
1438   cl::ParseCommandLineOptions(argc, argv);
1439 
1440   if (!ValidateCheckPrefixes()) {
1441     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1442               "start with a letter and contain only alphanumeric characters, "
1443               "hyphens and underscores\n";
1444     return 2;
1445   }
1446 
1447   Regex PrefixRE = buildCheckPrefixRegex();
1448   std::string REError;
1449   if (!PrefixRE.isValid(REError)) {
1450     errs() << "Unable to combine check-prefix strings into a prefix regular "
1451               "expression! This is likely a bug in FileCheck's verification of "
1452               "the check-prefix strings. Regular expression parsing failed "
1453               "with the following error: "
1454            << REError << "\n";
1455     return 2;
1456   }
1457 
1458   SourceMgr SM;
1459 
1460   // Read the expected strings from the check file.
1461   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1462       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1463   if (std::error_code EC = CheckFileOrErr.getError()) {
1464     errs() << "Could not open check file '" << CheckFilename
1465            << "': " << EC.message() << '\n';
1466     return 2;
1467   }
1468   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1469 
1470   SmallString<4096> CheckFileBuffer;
1471   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
1472 
1473   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1474                             CheckFileText, CheckFile.getBufferIdentifier()),
1475                         SMLoc());
1476 
1477   std::vector<CheckString> CheckStrings;
1478   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
1479     return 2;
1480 
1481   // Open the file to check and add it to SourceMgr.
1482   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1483       MemoryBuffer::getFileOrSTDIN(InputFilename);
1484   if (std::error_code EC = InputFileOrErr.getError()) {
1485     errs() << "Could not open input file '" << InputFilename
1486            << "': " << EC.message() << '\n';
1487     return 2;
1488   }
1489   MemoryBuffer &InputFile = *InputFileOrErr.get();
1490 
1491   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1492     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1493     DumpCommandLine(argc, argv);
1494     return 2;
1495   }
1496 
1497   SmallString<4096> InputFileBuffer;
1498   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
1499 
1500   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1501                             InputFileText, InputFile.getBufferIdentifier()),
1502                         SMLoc());
1503 
1504   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1505 }
1506