xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision 1714676ae0b342663173f5625853f1461a4efb65)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43                   cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string> CheckPrefixes(
46     "check-prefix",
47     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool> NoCanonicalizeWhiteSpace(
55     "strict-whitespace",
56     cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::opt<bool> AllowEmptyInput(
66     "allow-empty", cl::init(false),
67     cl::desc("Allow the input file to be empty. This is useful when making\n"
68              "checks that some error message does not occur, for example."));
69 
70 static cl::opt<bool> MatchFullLines(
71     "match-full-lines", cl::init(false),
72     cl::desc("Require all positive matches to cover an entire input line.\n"
73              "Allows leading and trailing whitespace if --strict-whitespace\n"
74              "is not also passed."));
75 
76 typedef cl::list<std::string>::const_iterator prefix_iterator;
77 
78 //===----------------------------------------------------------------------===//
79 // Pattern Handling Code.
80 //===----------------------------------------------------------------------===//
81 
82 namespace Check {
83 enum CheckType {
84   CheckNone = 0,
85   CheckPlain,
86   CheckNext,
87   CheckSame,
88   CheckNot,
89   CheckDAG,
90   CheckLabel,
91 
92   /// Indicates the pattern only matches the end of file. This is used for
93   /// trailing CHECK-NOTs.
94   CheckEOF,
95 
96   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
97   CheckBadNot
98 };
99 }
100 
101 class Pattern {
102   SMLoc PatternLoc;
103 
104   /// A fixed string to match as the pattern or empty if this pattern requires
105   /// a regex match.
106   StringRef FixedStr;
107 
108   /// A regex string to match as the pattern or empty if this pattern requires
109   /// a fixed string to match.
110   std::string RegExStr;
111 
112   /// Entries in this vector map to uses of a variable in the pattern, e.g.
113   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
114   /// we'll get an entry in this vector that tells us to insert the value of
115   /// bar at offset 3.
116   std::vector<std::pair<StringRef, unsigned>> VariableUses;
117 
118   /// Maps definitions of variables to their parenthesized capture numbers.
119   ///
120   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
121   /// 1.
122   std::map<StringRef, unsigned> VariableDefs;
123 
124   Check::CheckType CheckTy;
125 
126   /// Contains the number of line this pattern is in.
127   unsigned LineNumber;
128 
129 public:
130   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
131 
132   /// Returns the location in source code.
133   SMLoc getLoc() const { return PatternLoc; }
134 
135   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
136                     unsigned LineNumber);
137   size_t Match(StringRef Buffer, size_t &MatchLen,
138                StringMap<StringRef> &VariableTable) const;
139   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
140                         const StringMap<StringRef> &VariableTable) const;
141 
142   bool hasVariable() const {
143     return !(VariableUses.empty() && VariableDefs.empty());
144   }
145 
146   Check::CheckType getCheckTy() const { return CheckTy; }
147 
148 private:
149   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
150   void AddBackrefToRegEx(unsigned BackrefNum);
151   unsigned
152   ComputeMatchDistance(StringRef Buffer,
153                        const StringMap<StringRef> &VariableTable) const;
154   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
155   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
156 };
157 
158 /// Parses the given string into the Pattern.
159 ///
160 /// \p Prefix provides which prefix is being matched, \p SM provides the
161 /// SourceMgr used for error reports, and \p LineNumber is the line number in
162 /// the input file from which the pattern string was read. Returns true in
163 /// case of an error, false otherwise.
164 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
165                            SourceMgr &SM, unsigned LineNumber) {
166   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
167 
168   this->LineNumber = LineNumber;
169   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
170 
171   if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
172     // Ignore trailing whitespace.
173     while (!PatternStr.empty() &&
174            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
175       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
176 
177   // Check that there is something on the line.
178   if (PatternStr.empty()) {
179     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
180                     "found empty check string with prefix '" + Prefix + ":'");
181     return true;
182   }
183 
184   // Check to see if this is a fixed string, or if it has regex pieces.
185   if (!MatchFullLinesHere &&
186       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
187                                  PatternStr.find("[[") == StringRef::npos))) {
188     FixedStr = PatternStr;
189     return false;
190   }
191 
192   if (MatchFullLinesHere) {
193     RegExStr += '^';
194     if (!NoCanonicalizeWhiteSpace)
195       RegExStr += " *";
196   }
197 
198   // Paren value #0 is for the fully matched string.  Any new parenthesized
199   // values add from there.
200   unsigned CurParen = 1;
201 
202   // Otherwise, there is at least one regex piece.  Build up the regex pattern
203   // by escaping scary characters in fixed strings, building up one big regex.
204   while (!PatternStr.empty()) {
205     // RegEx matches.
206     if (PatternStr.startswith("{{")) {
207       // This is the start of a regex match.  Scan for the }}.
208       size_t End = PatternStr.find("}}");
209       if (End == StringRef::npos) {
210         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
211                         SourceMgr::DK_Error,
212                         "found start of regex string with no end '}}'");
213         return true;
214       }
215 
216       // Enclose {{}} patterns in parens just like [[]] even though we're not
217       // capturing the result for any purpose.  This is required in case the
218       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
219       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
220       RegExStr += '(';
221       ++CurParen;
222 
223       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
224         return true;
225       RegExStr += ')';
226 
227       PatternStr = PatternStr.substr(End + 2);
228       continue;
229     }
230 
231     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
232     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
233     // second form is [[foo]] which is a reference to foo.  The variable name
234     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
235     // it.  This is to catch some common errors.
236     if (PatternStr.startswith("[[")) {
237       // Find the closing bracket pair ending the match.  End is going to be an
238       // offset relative to the beginning of the match string.
239       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
240 
241       if (End == StringRef::npos) {
242         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
243                         SourceMgr::DK_Error,
244                         "invalid named regex reference, no ]] found");
245         return true;
246       }
247 
248       StringRef MatchStr = PatternStr.substr(2, End);
249       PatternStr = PatternStr.substr(End + 4);
250 
251       // Get the regex name (e.g. "foo").
252       size_t NameEnd = MatchStr.find(':');
253       StringRef Name = MatchStr.substr(0, NameEnd);
254 
255       if (Name.empty()) {
256         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
257                         "invalid name in named regex: empty name");
258         return true;
259       }
260 
261       // Verify that the name/expression is well formed. FileCheck currently
262       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
263       // is relaxed, more strict check is performed in \c EvaluateExpression.
264       bool IsExpression = false;
265       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
266         if (i == 0 && Name[i] == '@') {
267           if (NameEnd != StringRef::npos) {
268             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
269                             SourceMgr::DK_Error,
270                             "invalid name in named regex definition");
271             return true;
272           }
273           IsExpression = true;
274           continue;
275         }
276         if (Name[i] != '_' && !isalnum(Name[i]) &&
277             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
278           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
279                           SourceMgr::DK_Error, "invalid name in named regex");
280           return true;
281         }
282       }
283 
284       // Name can't start with a digit.
285       if (isdigit(static_cast<unsigned char>(Name[0]))) {
286         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
287                         "invalid name in named regex");
288         return true;
289       }
290 
291       // Handle [[foo]].
292       if (NameEnd == StringRef::npos) {
293         // Handle variables that were defined earlier on the same line by
294         // emitting a backreference.
295         if (VariableDefs.find(Name) != VariableDefs.end()) {
296           unsigned VarParenNum = VariableDefs[Name];
297           if (VarParenNum < 1 || VarParenNum > 9) {
298             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
299                             SourceMgr::DK_Error,
300                             "Can't back-reference more than 9 variables");
301             return true;
302           }
303           AddBackrefToRegEx(VarParenNum);
304         } else {
305           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
306         }
307         continue;
308       }
309 
310       // Handle [[foo:.*]].
311       VariableDefs[Name] = CurParen;
312       RegExStr += '(';
313       ++CurParen;
314 
315       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
316         return true;
317 
318       RegExStr += ')';
319     }
320 
321     // Handle fixed string matches.
322     // Find the end, which is the start of the next regex.
323     size_t FixedMatchEnd = PatternStr.find("{{");
324     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
325     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
326     PatternStr = PatternStr.substr(FixedMatchEnd);
327   }
328 
329   if (MatchFullLinesHere) {
330     if (!NoCanonicalizeWhiteSpace)
331       RegExStr += " *";
332     RegExStr += '$';
333   }
334 
335   return false;
336 }
337 
338 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
339   Regex R(RS);
340   std::string Error;
341   if (!R.isValid(Error)) {
342     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
343                     "invalid regex: " + Error);
344     return true;
345   }
346 
347   RegExStr += RS.str();
348   CurParen += R.getNumMatches();
349   return false;
350 }
351 
352 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
353   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
354   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
355   RegExStr += Backref;
356 }
357 
358 /// Evaluates expression and stores the result to \p Value.
359 ///
360 /// Returns true on success and false when the expression has invalid syntax.
361 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
362   // The only supported expression is @LINE([\+-]\d+)?
363   if (!Expr.startswith("@LINE"))
364     return false;
365   Expr = Expr.substr(StringRef("@LINE").size());
366   int Offset = 0;
367   if (!Expr.empty()) {
368     if (Expr[0] == '+')
369       Expr = Expr.substr(1);
370     else if (Expr[0] != '-')
371       return false;
372     if (Expr.getAsInteger(10, Offset))
373       return false;
374   }
375   Value = llvm::itostr(LineNumber + Offset);
376   return true;
377 }
378 
379 /// Matches the pattern string against the input buffer \p Buffer
380 ///
381 /// This returns the position that is matched or npos if there is no match. If
382 /// there is a match, the size of the matched string is returned in \p
383 /// MatchLen.
384 ///
385 /// The \p VariableTable StringMap provides the current values of filecheck
386 /// variables and is updated if this match defines new values.
387 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
388                       StringMap<StringRef> &VariableTable) const {
389   // If this is the EOF pattern, match it immediately.
390   if (CheckTy == Check::CheckEOF) {
391     MatchLen = 0;
392     return Buffer.size();
393   }
394 
395   // If this is a fixed string pattern, just match it now.
396   if (!FixedStr.empty()) {
397     MatchLen = FixedStr.size();
398     return Buffer.find(FixedStr);
399   }
400 
401   // Regex match.
402 
403   // If there are variable uses, we need to create a temporary string with the
404   // actual value.
405   StringRef RegExToMatch = RegExStr;
406   std::string TmpStr;
407   if (!VariableUses.empty()) {
408     TmpStr = RegExStr;
409 
410     unsigned InsertOffset = 0;
411     for (const auto &VariableUse : VariableUses) {
412       std::string Value;
413 
414       if (VariableUse.first[0] == '@') {
415         if (!EvaluateExpression(VariableUse.first, Value))
416           return StringRef::npos;
417       } else {
418         StringMap<StringRef>::iterator it =
419             VariableTable.find(VariableUse.first);
420         // If the variable is undefined, return an error.
421         if (it == VariableTable.end())
422           return StringRef::npos;
423 
424         // Look up the value and escape it so that we can put it into the regex.
425         Value += Regex::escape(it->second);
426       }
427 
428       // Plop it into the regex at the adjusted offset.
429       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
430                     Value.begin(), Value.end());
431       InsertOffset += Value.size();
432     }
433 
434     // Match the newly constructed regex.
435     RegExToMatch = TmpStr;
436   }
437 
438   SmallVector<StringRef, 4> MatchInfo;
439   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
440     return StringRef::npos;
441 
442   // Successful regex match.
443   assert(!MatchInfo.empty() && "Didn't get any match");
444   StringRef FullMatch = MatchInfo[0];
445 
446   // If this defines any variables, remember their values.
447   for (const auto &VariableDef : VariableDefs) {
448     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
449     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
450   }
451 
452   MatchLen = FullMatch.size();
453   return FullMatch.data() - Buffer.data();
454 }
455 
456 
457 /// Computes an arbitrary estimate for the quality of matching this pattern at
458 /// the start of \p Buffer; a distance of zero should correspond to a perfect
459 /// match.
460 unsigned
461 Pattern::ComputeMatchDistance(StringRef Buffer,
462                               const StringMap<StringRef> &VariableTable) const {
463   // Just compute the number of matching characters. For regular expressions, we
464   // just compare against the regex itself and hope for the best.
465   //
466   // FIXME: One easy improvement here is have the regex lib generate a single
467   // example regular expression which matches, and use that as the example
468   // string.
469   StringRef ExampleString(FixedStr);
470   if (ExampleString.empty())
471     ExampleString = RegExStr;
472 
473   // Only compare up to the first line in the buffer, or the string size.
474   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
475   BufferPrefix = BufferPrefix.split('\n').first;
476   return BufferPrefix.edit_distance(ExampleString);
477 }
478 
479 /// Prints additional information about a failure to match involving this
480 /// pattern.
481 void Pattern::PrintFailureInfo(
482     const SourceMgr &SM, StringRef Buffer,
483     const StringMap<StringRef> &VariableTable) const {
484   // If this was a regular expression using variables, print the current
485   // variable values.
486   if (!VariableUses.empty()) {
487     for (const auto &VariableUse : VariableUses) {
488       SmallString<256> Msg;
489       raw_svector_ostream OS(Msg);
490       StringRef Var = VariableUse.first;
491       if (Var[0] == '@') {
492         std::string Value;
493         if (EvaluateExpression(Var, Value)) {
494           OS << "with expression \"";
495           OS.write_escaped(Var) << "\" equal to \"";
496           OS.write_escaped(Value) << "\"";
497         } else {
498           OS << "uses incorrect expression \"";
499           OS.write_escaped(Var) << "\"";
500         }
501       } else {
502         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
503 
504         // Check for undefined variable references.
505         if (it == VariableTable.end()) {
506           OS << "uses undefined variable \"";
507           OS.write_escaped(Var) << "\"";
508         } else {
509           OS << "with variable \"";
510           OS.write_escaped(Var) << "\" equal to \"";
511           OS.write_escaped(it->second) << "\"";
512         }
513       }
514 
515       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
516                       OS.str());
517     }
518   }
519 
520   // Attempt to find the closest/best fuzzy match.  Usually an error happens
521   // because some string in the output didn't exactly match. In these cases, we
522   // would like to show the user a best guess at what "should have" matched, to
523   // save them having to actually check the input manually.
524   size_t NumLinesForward = 0;
525   size_t Best = StringRef::npos;
526   double BestQuality = 0;
527 
528   // Use an arbitrary 4k limit on how far we will search.
529   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
530     if (Buffer[i] == '\n')
531       ++NumLinesForward;
532 
533     // Patterns have leading whitespace stripped, so skip whitespace when
534     // looking for something which looks like a pattern.
535     if (Buffer[i] == ' ' || Buffer[i] == '\t')
536       continue;
537 
538     // Compute the "quality" of this match as an arbitrary combination of the
539     // match distance and the number of lines skipped to get to this match.
540     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
541     double Quality = Distance + (NumLinesForward / 100.);
542 
543     if (Quality < BestQuality || Best == StringRef::npos) {
544       Best = i;
545       BestQuality = Quality;
546     }
547   }
548 
549   // Print the "possible intended match here" line if we found something
550   // reasonable and not equal to what we showed in the "scanning from here"
551   // line.
552   if (Best && Best != StringRef::npos && BestQuality < 50) {
553     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
554                     SourceMgr::DK_Note, "possible intended match here");
555 
556     // FIXME: If we wanted to be really friendly we would show why the match
557     // failed, as it can be hard to spot simple one character differences.
558   }
559 }
560 
561 /// Finds the closing sequence of a regex variable usage or definition.
562 ///
563 /// \p Str has to point in the beginning of the definition (right after the
564 /// opening sequence). Returns the offset of the closing sequence within Str,
565 /// or npos if it was not found.
566 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
567   // Offset keeps track of the current offset within the input Str
568   size_t Offset = 0;
569   // [...] Nesting depth
570   size_t BracketDepth = 0;
571 
572   while (!Str.empty()) {
573     if (Str.startswith("]]") && BracketDepth == 0)
574       return Offset;
575     if (Str[0] == '\\') {
576       // Backslash escapes the next char within regexes, so skip them both.
577       Str = Str.substr(2);
578       Offset += 2;
579     } else {
580       switch (Str[0]) {
581       default:
582         break;
583       case '[':
584         BracketDepth++;
585         break;
586       case ']':
587         if (BracketDepth == 0) {
588           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
589                           SourceMgr::DK_Error,
590                           "missing closing \"]\" for regex variable");
591           exit(1);
592         }
593         BracketDepth--;
594         break;
595       }
596       Str = Str.substr(1);
597       Offset++;
598     }
599   }
600 
601   return StringRef::npos;
602 }
603 
604 //===----------------------------------------------------------------------===//
605 // Check Strings.
606 //===----------------------------------------------------------------------===//
607 
608 /// A check that we found in the input file.
609 struct CheckString {
610   /// The pattern to match.
611   Pattern Pat;
612 
613   /// Which prefix name this check matched.
614   StringRef Prefix;
615 
616   /// The location in the match file that the check string was specified.
617   SMLoc Loc;
618 
619   /// All of the strings that are disallowed from occurring between this match
620   /// string and the previous one (or start of file).
621   std::vector<Pattern> DagNotStrings;
622 
623   CheckString(const Pattern &P, StringRef S, SMLoc L)
624       : Pat(P), Prefix(S), Loc(L) {}
625 
626   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
627                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
628 
629   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
630   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
631   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
632                 const std::vector<const Pattern *> &NotStrings,
633                 StringMap<StringRef> &VariableTable) const;
634   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
635                   std::vector<const Pattern *> &NotStrings,
636                   StringMap<StringRef> &VariableTable) const;
637 };
638 
639 /// Canonicalize whitespaces in the file. Line endings are replaced with
640 /// UNIX-style '\n'.
641 static StringRef CanonicalizeFile(MemoryBuffer &MB,
642                                   SmallVectorImpl<char> &OutputBuffer) {
643   OutputBuffer.reserve(MB.getBufferSize());
644 
645   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
646        Ptr != End; ++Ptr) {
647     // Eliminate trailing dosish \r.
648     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
649       continue;
650     }
651 
652     // If current char is not a horizontal whitespace or if horizontal
653     // whitespace canonicalization is disabled, dump it to output as is.
654     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
655       OutputBuffer.push_back(*Ptr);
656       continue;
657     }
658 
659     // Otherwise, add one space and advance over neighboring space.
660     OutputBuffer.push_back(' ');
661     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
662       ++Ptr;
663   }
664 
665   // Add a null byte and then return all but that byte.
666   OutputBuffer.push_back('\0');
667   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
668 }
669 
670 static bool IsPartOfWord(char c) {
671   return (isalnum(c) || c == '-' || c == '_');
672 }
673 
674 // Get the size of the prefix extension.
675 static size_t CheckTypeSize(Check::CheckType Ty) {
676   switch (Ty) {
677   case Check::CheckNone:
678   case Check::CheckBadNot:
679     return 0;
680 
681   case Check::CheckPlain:
682     return sizeof(":") - 1;
683 
684   case Check::CheckNext:
685     return sizeof("-NEXT:") - 1;
686 
687   case Check::CheckSame:
688     return sizeof("-SAME:") - 1;
689 
690   case Check::CheckNot:
691     return sizeof("-NOT:") - 1;
692 
693   case Check::CheckDAG:
694     return sizeof("-DAG:") - 1;
695 
696   case Check::CheckLabel:
697     return sizeof("-LABEL:") - 1;
698 
699   case Check::CheckEOF:
700     llvm_unreachable("Should not be using EOF size");
701   }
702 
703   llvm_unreachable("Bad check type");
704 }
705 
706 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
707   char NextChar = Buffer[Prefix.size()];
708 
709   // Verify that the : is present after the prefix.
710   if (NextChar == ':')
711     return Check::CheckPlain;
712 
713   if (NextChar != '-')
714     return Check::CheckNone;
715 
716   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
717   if (Rest.startswith("NEXT:"))
718     return Check::CheckNext;
719 
720   if (Rest.startswith("SAME:"))
721     return Check::CheckSame;
722 
723   if (Rest.startswith("NOT:"))
724     return Check::CheckNot;
725 
726   if (Rest.startswith("DAG:"))
727     return Check::CheckDAG;
728 
729   if (Rest.startswith("LABEL:"))
730     return Check::CheckLabel;
731 
732   // You can't combine -NOT with another suffix.
733   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
734       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
735       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
736     return Check::CheckBadNot;
737 
738   return Check::CheckNone;
739 }
740 
741 // From the given position, find the next character after the word.
742 static size_t SkipWord(StringRef Str, size_t Loc) {
743   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
744     ++Loc;
745   return Loc;
746 }
747 
748 /// Search the buffer for the first prefix in the prefix regular expression.
749 ///
750 /// This searches the buffer using the provided regular expression, however it
751 /// enforces constraints beyond that:
752 /// 1) The found prefix must not be a suffix of something that looks like
753 ///    a valid prefix.
754 /// 2) The found prefix must be followed by a valid check type suffix using \c
755 ///    FindCheckType above.
756 ///
757 /// The first match of the regular expression to satisfy these two is returned,
758 /// otherwise an empty StringRef is returned to indicate failure.
759 ///
760 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
761 /// start at the beginning of the returned prefix, increment \p LineNumber for
762 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
763 /// check found by examining the suffix.
764 ///
765 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
766 /// is unspecified.
767 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
768                                          unsigned &LineNumber,
769                                          Check::CheckType &CheckTy) {
770   SmallVector<StringRef, 2> Matches;
771 
772   while (!Buffer.empty()) {
773     // Find the first (longest) match using the RE.
774     if (!PrefixRE.match(Buffer, &Matches))
775       // No match at all, bail.
776       return StringRef();
777 
778     StringRef Prefix = Matches[0];
779     Matches.clear();
780 
781     assert(Prefix.data() >= Buffer.data() &&
782            Prefix.data() < Buffer.data() + Buffer.size() &&
783            "Prefix doesn't start inside of buffer!");
784     size_t Loc = Prefix.data() - Buffer.data();
785     StringRef Skipped = Buffer.substr(0, Loc);
786     Buffer = Buffer.drop_front(Loc);
787     LineNumber += Skipped.count('\n');
788 
789     // Check that the matched prefix isn't a suffix of some other check-like
790     // word.
791     // FIXME: This is a very ad-hoc check. it would be better handled in some
792     // other way. Among other things it seems hard to distinguish between
793     // intentional and unintentional uses of this feature.
794     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
795       // Now extract the type.
796       CheckTy = FindCheckType(Buffer, Prefix);
797 
798       // If we've found a valid check type for this prefix, we're done.
799       if (CheckTy != Check::CheckNone)
800         return Prefix;
801     }
802 
803     // If we didn't successfully find a prefix, we need to skip this invalid
804     // prefix and continue scanning. We directly skip the prefix that was
805     // matched and any additional parts of that check-like word.
806     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
807   }
808 
809   // We ran out of buffer while skipping partial matches so give up.
810   return StringRef();
811 }
812 
813 /// Read the check file, which specifies the sequence of expected strings.
814 ///
815 /// The strings are added to the CheckStrings vector. Returns true in case of
816 /// an error, false otherwise.
817 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
818                           std::vector<CheckString> &CheckStrings) {
819   std::vector<Pattern> ImplicitNegativeChecks;
820   for (const auto &PatternString : ImplicitCheckNot) {
821     // Create a buffer with fake command line content in order to display the
822     // command line option responsible for the specific implicit CHECK-NOT.
823     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
824     std::string Suffix = "'";
825     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
826         Prefix + PatternString + Suffix, "command line");
827 
828     StringRef PatternInBuffer =
829         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
830     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
831 
832     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
833     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
834                                                "IMPLICIT-CHECK", SM, 0);
835   }
836 
837   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
838 
839   // LineNumber keeps track of the line on which CheckPrefix instances are
840   // found.
841   unsigned LineNumber = 1;
842 
843   while (1) {
844     Check::CheckType CheckTy;
845 
846     // See if a prefix occurs in the memory buffer.
847     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
848                                                    CheckTy);
849     if (UsedPrefix.empty())
850       break;
851     assert(UsedPrefix.data() == Buffer.data() &&
852            "Failed to move Buffer's start forward, or pointed prefix outside "
853            "of the buffer!");
854 
855     // Location to use for error messages.
856     const char *UsedPrefixStart = UsedPrefix.data();
857 
858     // Skip the buffer to the end.
859     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
860 
861     // Complain about useful-looking but unsupported suffixes.
862     if (CheckTy == Check::CheckBadNot) {
863       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
864                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
865       return true;
866     }
867 
868     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
869     // leading whitespace.
870     if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
871       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
872 
873     // Scan ahead to the end of line.
874     size_t EOL = Buffer.find_first_of("\n\r");
875 
876     // Remember the location of the start of the pattern, for diagnostics.
877     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
878 
879     // Parse the pattern.
880     Pattern P(CheckTy);
881     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
882       return true;
883 
884     // Verify that CHECK-LABEL lines do not define or use variables
885     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
886       SM.PrintMessage(
887           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
888           "found '" + UsedPrefix + "-LABEL:'"
889                                    " with variable definition or use");
890       return true;
891     }
892 
893     Buffer = Buffer.substr(EOL);
894 
895     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
896     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
897         CheckStrings.empty()) {
898       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
899       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
900                       SourceMgr::DK_Error,
901                       "found '" + UsedPrefix + "-" + Type +
902                           "' without previous '" + UsedPrefix + ": line");
903       return true;
904     }
905 
906     // Handle CHECK-DAG/-NOT.
907     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
908       DagNotMatches.push_back(P);
909       continue;
910     }
911 
912     // Okay, add the string we captured to the output vector and move on.
913     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
914     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
915     DagNotMatches = ImplicitNegativeChecks;
916   }
917 
918   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
919   // prefix as a filler for the error message.
920   if (!DagNotMatches.empty()) {
921     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
922                               SMLoc::getFromPointer(Buffer.data()));
923     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
924   }
925 
926   if (CheckStrings.empty()) {
927     errs() << "error: no check strings found with prefix"
928            << (CheckPrefixes.size() > 1 ? "es " : " ");
929     prefix_iterator I = CheckPrefixes.begin();
930     prefix_iterator E = CheckPrefixes.end();
931     if (I != E) {
932       errs() << "\'" << *I << ":'";
933       ++I;
934     }
935     for (; I != E; ++I)
936       errs() << ", \'" << *I << ":'";
937 
938     errs() << '\n';
939     return true;
940   }
941 
942   return false;
943 }
944 
945 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
946                              StringRef Buffer,
947                              StringMap<StringRef> &VariableTable) {
948   // Otherwise, we have an error, emit an error message.
949   SM.PrintMessage(Loc, SourceMgr::DK_Error,
950                   "expected string not found in input");
951 
952   // Print the "scanning from here" line.  If the current position is at the
953   // end of a line, advance to the start of the next line.
954   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
955 
956   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
957                   "scanning from here");
958 
959   // Allow the pattern to print additional information if desired.
960   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
961 }
962 
963 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
964                              StringRef Buffer,
965                              StringMap<StringRef> &VariableTable) {
966   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
967 }
968 
969 /// Count the number of newlines in the specified range.
970 static unsigned CountNumNewlinesBetween(StringRef Range,
971                                         const char *&FirstNewLine) {
972   unsigned NumNewLines = 0;
973   while (1) {
974     // Scan for newline.
975     Range = Range.substr(Range.find_first_of("\n\r"));
976     if (Range.empty())
977       return NumNewLines;
978 
979     ++NumNewLines;
980 
981     // Handle \n\r and \r\n as a single newline.
982     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
983         (Range[0] != Range[1]))
984       Range = Range.substr(1);
985     Range = Range.substr(1);
986 
987     if (NumNewLines == 1)
988       FirstNewLine = Range.begin();
989   }
990 }
991 
992 /// Match check string and its "not strings" and/or "dag strings".
993 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
994                           bool IsLabelScanMode, size_t &MatchLen,
995                           StringMap<StringRef> &VariableTable) const {
996   size_t LastPos = 0;
997   std::vector<const Pattern *> NotStrings;
998 
999   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1000   // bounds; we have not processed variable definitions within the bounded block
1001   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1002   // over the block again (including the last CHECK-LABEL) in normal mode.
1003   if (!IsLabelScanMode) {
1004     // Match "dag strings" (with mixed "not strings" if any).
1005     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1006     if (LastPos == StringRef::npos)
1007       return StringRef::npos;
1008   }
1009 
1010   // Match itself from the last position after matching CHECK-DAG.
1011   StringRef MatchBuffer = Buffer.substr(LastPos);
1012   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1013   if (MatchPos == StringRef::npos) {
1014     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1015     return StringRef::npos;
1016   }
1017 
1018   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1019   // or CHECK-NOT
1020   if (!IsLabelScanMode) {
1021     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1022 
1023     // If this check is a "CHECK-NEXT", verify that the previous match was on
1024     // the previous line (i.e. that there is one newline between them).
1025     if (CheckNext(SM, SkippedRegion))
1026       return StringRef::npos;
1027 
1028     // If this check is a "CHECK-SAME", verify that the previous match was on
1029     // the same line (i.e. that there is no newline between them).
1030     if (CheckSame(SM, SkippedRegion))
1031       return StringRef::npos;
1032 
1033     // If this match had "not strings", verify that they don't exist in the
1034     // skipped region.
1035     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1036       return StringRef::npos;
1037   }
1038 
1039   return LastPos + MatchPos;
1040 }
1041 
1042 /// Verify there is a single line in the given buffer.
1043 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1044   if (Pat.getCheckTy() != Check::CheckNext)
1045     return false;
1046 
1047   // Count the number of newlines between the previous match and this one.
1048   assert(Buffer.data() !=
1049              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1050                                     SMLoc::getFromPointer(Buffer.data())))
1051                  ->getBufferStart() &&
1052          "CHECK-NEXT can't be the first check in a file");
1053 
1054   const char *FirstNewLine = nullptr;
1055   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1056 
1057   if (NumNewLines == 0) {
1058     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1059                     Prefix + "-NEXT: is on the same line as previous match");
1060     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1061                     "'next' match was here");
1062     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1063                     "previous match ended here");
1064     return true;
1065   }
1066 
1067   if (NumNewLines != 1) {
1068     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1069                     Prefix +
1070                         "-NEXT: is not on the line after the previous match");
1071     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1072                     "'next' match was here");
1073     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1074                     "previous match ended here");
1075     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1076                     "non-matching line after previous match is here");
1077     return true;
1078   }
1079 
1080   return false;
1081 }
1082 
1083 /// Verify there is no newline in the given buffer.
1084 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1085   if (Pat.getCheckTy() != Check::CheckSame)
1086     return false;
1087 
1088   // Count the number of newlines between the previous match and this one.
1089   assert(Buffer.data() !=
1090              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1091                                     SMLoc::getFromPointer(Buffer.data())))
1092                  ->getBufferStart() &&
1093          "CHECK-SAME can't be the first check in a file");
1094 
1095   const char *FirstNewLine = nullptr;
1096   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1097 
1098   if (NumNewLines != 0) {
1099     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1100                     Prefix +
1101                         "-SAME: is not on the same line as the previous match");
1102     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1103                     "'next' match was here");
1104     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1105                     "previous match ended here");
1106     return true;
1107   }
1108 
1109   return false;
1110 }
1111 
1112 /// Verify there's no "not strings" in the given buffer.
1113 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1114                            const std::vector<const Pattern *> &NotStrings,
1115                            StringMap<StringRef> &VariableTable) const {
1116   for (const Pattern *Pat : NotStrings) {
1117     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1118 
1119     size_t MatchLen = 0;
1120     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1121 
1122     if (Pos == StringRef::npos)
1123       continue;
1124 
1125     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
1126                     SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
1127     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1128                     Prefix + "-NOT: pattern specified here");
1129     return true;
1130   }
1131 
1132   return false;
1133 }
1134 
1135 /// Match "dag strings" and their mixed "not strings".
1136 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1137                              std::vector<const Pattern *> &NotStrings,
1138                              StringMap<StringRef> &VariableTable) const {
1139   if (DagNotStrings.empty())
1140     return 0;
1141 
1142   size_t LastPos = 0;
1143   size_t StartPos = LastPos;
1144 
1145   for (const Pattern &Pat : DagNotStrings) {
1146     assert((Pat.getCheckTy() == Check::CheckDAG ||
1147             Pat.getCheckTy() == Check::CheckNot) &&
1148            "Invalid CHECK-DAG or CHECK-NOT!");
1149 
1150     if (Pat.getCheckTy() == Check::CheckNot) {
1151       NotStrings.push_back(&Pat);
1152       continue;
1153     }
1154 
1155     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1156 
1157     size_t MatchLen = 0, MatchPos;
1158 
1159     // CHECK-DAG always matches from the start.
1160     StringRef MatchBuffer = Buffer.substr(StartPos);
1161     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1162     // With a group of CHECK-DAGs, a single mismatching means the match on
1163     // that group of CHECK-DAGs fails immediately.
1164     if (MatchPos == StringRef::npos) {
1165       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1166       return StringRef::npos;
1167     }
1168     // Re-calc it as the offset relative to the start of the original string.
1169     MatchPos += StartPos;
1170 
1171     if (!NotStrings.empty()) {
1172       if (MatchPos < LastPos) {
1173         // Reordered?
1174         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1175                         SourceMgr::DK_Error,
1176                         Prefix + "-DAG: found a match of CHECK-DAG"
1177                                  " reordering across a CHECK-NOT");
1178         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1179                         SourceMgr::DK_Note,
1180                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1181                                  " is found here");
1182         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1183                         Prefix + "-NOT: the crossed pattern specified"
1184                                  " here");
1185         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1186                         Prefix + "-DAG: the reordered pattern specified"
1187                                  " here");
1188         return StringRef::npos;
1189       }
1190       // All subsequent CHECK-DAGs should be matched from the farthest
1191       // position of all precedent CHECK-DAGs (including this one.)
1192       StartPos = LastPos;
1193       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1194       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1195       // region.
1196       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1197       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1198         return StringRef::npos;
1199       // Clear "not strings".
1200       NotStrings.clear();
1201     }
1202 
1203     // Update the last position with CHECK-DAG matches.
1204     LastPos = std::max(MatchPos + MatchLen, LastPos);
1205   }
1206 
1207   return LastPos;
1208 }
1209 
1210 // A check prefix must contain only alphanumeric, hyphens and underscores.
1211 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1212   Regex Validator("^[a-zA-Z0-9_-]*$");
1213   return Validator.match(CheckPrefix);
1214 }
1215 
1216 static bool ValidateCheckPrefixes() {
1217   StringSet<> PrefixSet;
1218 
1219   for (StringRef Prefix : CheckPrefixes) {
1220     // Reject empty prefixes.
1221     if (Prefix == "")
1222       return false;
1223 
1224     if (!PrefixSet.insert(Prefix).second)
1225       return false;
1226 
1227     if (!ValidateCheckPrefix(Prefix))
1228       return false;
1229   }
1230 
1231   return true;
1232 }
1233 
1234 // Combines the check prefixes into a single regex so that we can efficiently
1235 // scan for any of the set.
1236 //
1237 // The semantics are that the longest-match wins which matches our regex
1238 // library.
1239 static Regex buildCheckPrefixRegex() {
1240   // I don't think there's a way to specify an initial value for cl::list,
1241   // so if nothing was specified, add the default
1242   if (CheckPrefixes.empty())
1243     CheckPrefixes.push_back("CHECK");
1244 
1245   // We already validated the contents of CheckPrefixes so just concatenate
1246   // them as alternatives.
1247   SmallString<32> PrefixRegexStr;
1248   for (StringRef Prefix : CheckPrefixes) {
1249     if (Prefix != CheckPrefixes.front())
1250       PrefixRegexStr.push_back('|');
1251 
1252     PrefixRegexStr.append(Prefix);
1253   }
1254 
1255   return Regex(PrefixRegexStr);
1256 }
1257 
1258 static void DumpCommandLine(int argc, char **argv) {
1259   errs() << "FileCheck command line: ";
1260   for (int I = 0; I < argc; I++)
1261     errs() << " " << argv[I];
1262   errs() << "\n";
1263 }
1264 
1265 /// Check the input to FileCheck provided in the \p Buffer against the \p
1266 /// CheckStrings read from the check file.
1267 ///
1268 /// Returns false if the input fails to satisfy the checks.
1269 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1270                 ArrayRef<CheckString> CheckStrings) {
1271   bool ChecksFailed = false;
1272 
1273   /// VariableTable - This holds all the current filecheck variables.
1274   StringMap<StringRef> VariableTable;
1275 
1276   unsigned i = 0, j = 0, e = CheckStrings.size();
1277   while (true) {
1278     StringRef CheckRegion;
1279     if (j == e) {
1280       CheckRegion = Buffer;
1281     } else {
1282       const CheckString &CheckLabelStr = CheckStrings[j];
1283       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1284         ++j;
1285         continue;
1286       }
1287 
1288       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1289       size_t MatchLabelLen = 0;
1290       size_t MatchLabelPos =
1291           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
1292       if (MatchLabelPos == StringRef::npos)
1293         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1294         return false;
1295 
1296       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1297       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1298       ++j;
1299     }
1300 
1301     for (; i != j; ++i) {
1302       const CheckString &CheckStr = CheckStrings[i];
1303 
1304       // Check each string within the scanned region, including a second check
1305       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1306       size_t MatchLen = 0;
1307       size_t MatchPos =
1308           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
1309 
1310       if (MatchPos == StringRef::npos) {
1311         ChecksFailed = true;
1312         i = j;
1313         break;
1314       }
1315 
1316       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1317     }
1318 
1319     if (j == e)
1320       break;
1321   }
1322 
1323   // Success if no checks failed.
1324   return !ChecksFailed;
1325 }
1326 
1327 int main(int argc, char **argv) {
1328   sys::PrintStackTraceOnErrorSignal(argv[0]);
1329   PrettyStackTraceProgram X(argc, argv);
1330   cl::ParseCommandLineOptions(argc, argv);
1331 
1332   if (!ValidateCheckPrefixes()) {
1333     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1334               "start with a letter and contain only alphanumeric characters, "
1335               "hyphens and underscores\n";
1336     return 2;
1337   }
1338 
1339   Regex PrefixRE = buildCheckPrefixRegex();
1340   std::string REError;
1341   if (!PrefixRE.isValid(REError)) {
1342     errs() << "Unable to combine check-prefix strings into a prefix regular "
1343               "expression! This is likely a bug in FileCheck's verification of "
1344               "the check-prefix strings. Regular expression parsing failed "
1345               "with the following error: "
1346            << REError << "\n";
1347     return 2;
1348   }
1349 
1350   SourceMgr SM;
1351 
1352   // Read the expected strings from the check file.
1353   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1354       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1355   if (std::error_code EC = CheckFileOrErr.getError()) {
1356     errs() << "Could not open check file '" << CheckFilename
1357            << "': " << EC.message() << '\n';
1358     return 2;
1359   }
1360   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1361 
1362   SmallString<4096> CheckFileBuffer;
1363   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
1364 
1365   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1366                             CheckFileText, CheckFile.getBufferIdentifier()),
1367                         SMLoc());
1368 
1369   std::vector<CheckString> CheckStrings;
1370   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
1371     return 2;
1372 
1373   // Open the file to check and add it to SourceMgr.
1374   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1375       MemoryBuffer::getFileOrSTDIN(InputFilename);
1376   if (std::error_code EC = InputFileOrErr.getError()) {
1377     errs() << "Could not open input file '" << InputFilename
1378            << "': " << EC.message() << '\n';
1379     return 2;
1380   }
1381   MemoryBuffer &InputFile = *InputFileOrErr.get();
1382 
1383   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1384     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1385     DumpCommandLine(argc, argv);
1386     return 2;
1387   }
1388 
1389   SmallString<4096> InputFileBuffer;
1390   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
1391 
1392   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1393                             InputFileText, InputFile.getBufferIdentifier()),
1394                         SMLoc());
1395 
1396   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1397 }
1398