xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision a26bc91456865f8cd039c9ebd72def3a0b4a80f1)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43                   cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string> CheckPrefixes(
46     "check-prefix",
47     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool> NoCanonicalizeWhiteSpace(
55     "strict-whitespace",
56     cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::opt<bool> AllowEmptyInput(
66     "allow-empty", cl::init(false),
67     cl::desc("Allow the input file to be empty. This is useful when making\n"
68              "checks that some error message does not occur, for example."));
69 
70 static cl::opt<bool> MatchFullLines(
71     "match-full-lines", cl::init(false),
72     cl::desc("Require all positive matches to cover an entire input line.\n"
73              "Allows leading and trailing whitespace if --strict-whitespace\n"
74              "is not also passed."));
75 
76 typedef cl::list<std::string>::const_iterator prefix_iterator;
77 
78 //===----------------------------------------------------------------------===//
79 // Pattern Handling Code.
80 //===----------------------------------------------------------------------===//
81 
82 namespace Check {
83 enum CheckType {
84   CheckNone = 0,
85   CheckPlain,
86   CheckNext,
87   CheckSame,
88   CheckNot,
89   CheckDAG,
90   CheckLabel,
91 
92   /// Indicates the pattern only matches the end of file. This is used for
93   /// trailing CHECK-NOTs.
94   CheckEOF,
95 
96   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
97   CheckBadNot
98 };
99 }
100 
101 class Pattern {
102   SMLoc PatternLoc;
103 
104   /// A fixed string to match as the pattern or empty if this pattern requires
105   /// a regex match.
106   StringRef FixedStr;
107 
108   /// A regex string to match as the pattern or empty if this pattern requires
109   /// a fixed string to match.
110   std::string RegExStr;
111 
112   /// Entries in this vector map to uses of a variable in the pattern, e.g.
113   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
114   /// we'll get an entry in this vector that tells us to insert the value of
115   /// bar at offset 3.
116   std::vector<std::pair<StringRef, unsigned>> VariableUses;
117 
118   /// Maps definitions of variables to their parenthesized capture numbers.
119   ///
120   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
121   /// 1.
122   std::map<StringRef, unsigned> VariableDefs;
123 
124   Check::CheckType CheckTy;
125 
126   /// Contains the number of line this pattern is in.
127   unsigned LineNumber;
128 
129 public:
130   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
131 
132   /// Returns the location in source code.
133   SMLoc getLoc() const { return PatternLoc; }
134 
135   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
136                     unsigned LineNumber);
137   size_t Match(StringRef Buffer, size_t &MatchLen,
138                StringMap<StringRef> &VariableTable) const;
139   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
140                         const StringMap<StringRef> &VariableTable) const;
141 
142   bool hasVariable() const {
143     return !(VariableUses.empty() && VariableDefs.empty());
144   }
145 
146   Check::CheckType getCheckTy() const { return CheckTy; }
147 
148 private:
149   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
150   void AddBackrefToRegEx(unsigned BackrefNum);
151   unsigned
152   ComputeMatchDistance(StringRef Buffer,
153                        const StringMap<StringRef> &VariableTable) const;
154   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
155   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
156 };
157 
158 /// Parses the given string into the Pattern.
159 ///
160 /// \p Prefix provides which prefix is being matched, \p SM provides the
161 /// SourceMgr used for error reports, and \p LineNumber is the line number in
162 /// the input file from which the pattern string was read. Returns true in
163 /// case of an error, false otherwise.
164 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
165                            SourceMgr &SM, unsigned LineNumber) {
166   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
167 
168   this->LineNumber = LineNumber;
169   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
170 
171   // Ignore trailing whitespace.
172   while (!PatternStr.empty() &&
173          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
174     PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
175 
176   // Check that there is something on the line.
177   if (PatternStr.empty()) {
178     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
179                     "found empty check string with prefix '" + Prefix + ":'");
180     return true;
181   }
182 
183   // Check to see if this is a fixed string, or if it has regex pieces.
184   if (!MatchFullLinesHere &&
185       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
186                                  PatternStr.find("[[") == StringRef::npos))) {
187     FixedStr = PatternStr;
188     return false;
189   }
190 
191   if (MatchFullLinesHere) {
192     RegExStr += '^';
193     if (!NoCanonicalizeWhiteSpace)
194       RegExStr += " *";
195   }
196 
197   // Paren value #0 is for the fully matched string.  Any new parenthesized
198   // values add from there.
199   unsigned CurParen = 1;
200 
201   // Otherwise, there is at least one regex piece.  Build up the regex pattern
202   // by escaping scary characters in fixed strings, building up one big regex.
203   while (!PatternStr.empty()) {
204     // RegEx matches.
205     if (PatternStr.startswith("{{")) {
206       // This is the start of a regex match.  Scan for the }}.
207       size_t End = PatternStr.find("}}");
208       if (End == StringRef::npos) {
209         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
210                         SourceMgr::DK_Error,
211                         "found start of regex string with no end '}}'");
212         return true;
213       }
214 
215       // Enclose {{}} patterns in parens just like [[]] even though we're not
216       // capturing the result for any purpose.  This is required in case the
217       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
218       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
219       RegExStr += '(';
220       ++CurParen;
221 
222       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
223         return true;
224       RegExStr += ')';
225 
226       PatternStr = PatternStr.substr(End + 2);
227       continue;
228     }
229 
230     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
231     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
232     // second form is [[foo]] which is a reference to foo.  The variable name
233     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
234     // it.  This is to catch some common errors.
235     if (PatternStr.startswith("[[")) {
236       // Find the closing bracket pair ending the match.  End is going to be an
237       // offset relative to the beginning of the match string.
238       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
239 
240       if (End == StringRef::npos) {
241         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
242                         SourceMgr::DK_Error,
243                         "invalid named regex reference, no ]] found");
244         return true;
245       }
246 
247       StringRef MatchStr = PatternStr.substr(2, End);
248       PatternStr = PatternStr.substr(End + 4);
249 
250       // Get the regex name (e.g. "foo").
251       size_t NameEnd = MatchStr.find(':');
252       StringRef Name = MatchStr.substr(0, NameEnd);
253 
254       if (Name.empty()) {
255         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
256                         "invalid name in named regex: empty name");
257         return true;
258       }
259 
260       // Verify that the name/expression is well formed. FileCheck currently
261       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
262       // is relaxed, more strict check is performed in \c EvaluateExpression.
263       bool IsExpression = false;
264       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
265         if (i == 0 && Name[i] == '@') {
266           if (NameEnd != StringRef::npos) {
267             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
268                             SourceMgr::DK_Error,
269                             "invalid name in named regex definition");
270             return true;
271           }
272           IsExpression = true;
273           continue;
274         }
275         if (Name[i] != '_' && !isalnum(Name[i]) &&
276             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
277           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
278                           SourceMgr::DK_Error, "invalid name in named regex");
279           return true;
280         }
281       }
282 
283       // Name can't start with a digit.
284       if (isdigit(static_cast<unsigned char>(Name[0]))) {
285         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
286                         "invalid name in named regex");
287         return true;
288       }
289 
290       // Handle [[foo]].
291       if (NameEnd == StringRef::npos) {
292         // Handle variables that were defined earlier on the same line by
293         // emitting a backreference.
294         if (VariableDefs.find(Name) != VariableDefs.end()) {
295           unsigned VarParenNum = VariableDefs[Name];
296           if (VarParenNum < 1 || VarParenNum > 9) {
297             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
298                             SourceMgr::DK_Error,
299                             "Can't back-reference more than 9 variables");
300             return true;
301           }
302           AddBackrefToRegEx(VarParenNum);
303         } else {
304           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
305         }
306         continue;
307       }
308 
309       // Handle [[foo:.*]].
310       VariableDefs[Name] = CurParen;
311       RegExStr += '(';
312       ++CurParen;
313 
314       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
315         return true;
316 
317       RegExStr += ')';
318     }
319 
320     // Handle fixed string matches.
321     // Find the end, which is the start of the next regex.
322     size_t FixedMatchEnd = PatternStr.find("{{");
323     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
324     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
325     PatternStr = PatternStr.substr(FixedMatchEnd);
326   }
327 
328   if (MatchFullLinesHere) {
329     if (!NoCanonicalizeWhiteSpace)
330       RegExStr += " *";
331     RegExStr += '$';
332   }
333 
334   return false;
335 }
336 
337 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
338   Regex R(RS);
339   std::string Error;
340   if (!R.isValid(Error)) {
341     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
342                     "invalid regex: " + Error);
343     return true;
344   }
345 
346   RegExStr += RS.str();
347   CurParen += R.getNumMatches();
348   return false;
349 }
350 
351 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
352   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
353   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
354   RegExStr += Backref;
355 }
356 
357 /// Evaluates expression and stores the result to \p Value.
358 ///
359 /// Returns true on success and false when the expression has invalid syntax.
360 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
361   // The only supported expression is @LINE([\+-]\d+)?
362   if (!Expr.startswith("@LINE"))
363     return false;
364   Expr = Expr.substr(StringRef("@LINE").size());
365   int Offset = 0;
366   if (!Expr.empty()) {
367     if (Expr[0] == '+')
368       Expr = Expr.substr(1);
369     else if (Expr[0] != '-')
370       return false;
371     if (Expr.getAsInteger(10, Offset))
372       return false;
373   }
374   Value = llvm::itostr(LineNumber + Offset);
375   return true;
376 }
377 
378 /// Matches the pattern string against the input buffer \p Buffer
379 ///
380 /// This returns the position that is matched or npos if there is no match. If
381 /// there is a match, the size of the matched string is returned in \p
382 /// MatchLen.
383 ///
384 /// The \p VariableTable StringMap provides the current values of filecheck
385 /// variables and is updated if this match defines new values.
386 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
387                       StringMap<StringRef> &VariableTable) const {
388   // If this is the EOF pattern, match it immediately.
389   if (CheckTy == Check::CheckEOF) {
390     MatchLen = 0;
391     return Buffer.size();
392   }
393 
394   // If this is a fixed string pattern, just match it now.
395   if (!FixedStr.empty()) {
396     MatchLen = FixedStr.size();
397     return Buffer.find(FixedStr);
398   }
399 
400   // Regex match.
401 
402   // If there are variable uses, we need to create a temporary string with the
403   // actual value.
404   StringRef RegExToMatch = RegExStr;
405   std::string TmpStr;
406   if (!VariableUses.empty()) {
407     TmpStr = RegExStr;
408 
409     unsigned InsertOffset = 0;
410     for (const auto &VariableUse : VariableUses) {
411       std::string Value;
412 
413       if (VariableUse.first[0] == '@') {
414         if (!EvaluateExpression(VariableUse.first, Value))
415           return StringRef::npos;
416       } else {
417         StringMap<StringRef>::iterator it =
418             VariableTable.find(VariableUse.first);
419         // If the variable is undefined, return an error.
420         if (it == VariableTable.end())
421           return StringRef::npos;
422 
423         // Look up the value and escape it so that we can put it into the regex.
424         Value += Regex::escape(it->second);
425       }
426 
427       // Plop it into the regex at the adjusted offset.
428       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
429                     Value.begin(), Value.end());
430       InsertOffset += Value.size();
431     }
432 
433     // Match the newly constructed regex.
434     RegExToMatch = TmpStr;
435   }
436 
437   SmallVector<StringRef, 4> MatchInfo;
438   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
439     return StringRef::npos;
440 
441   // Successful regex match.
442   assert(!MatchInfo.empty() && "Didn't get any match");
443   StringRef FullMatch = MatchInfo[0];
444 
445   // If this defines any variables, remember their values.
446   for (const auto &VariableDef : VariableDefs) {
447     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
448     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
449   }
450 
451   MatchLen = FullMatch.size();
452   return FullMatch.data() - Buffer.data();
453 }
454 
455 
456 /// Computes an arbitrary estimate for the quality of matching this pattern at
457 /// the start of \p Buffer; a distance of zero should correspond to a perfect
458 /// match.
459 unsigned
460 Pattern::ComputeMatchDistance(StringRef Buffer,
461                               const StringMap<StringRef> &VariableTable) const {
462   // Just compute the number of matching characters. For regular expressions, we
463   // just compare against the regex itself and hope for the best.
464   //
465   // FIXME: One easy improvement here is have the regex lib generate a single
466   // example regular expression which matches, and use that as the example
467   // string.
468   StringRef ExampleString(FixedStr);
469   if (ExampleString.empty())
470     ExampleString = RegExStr;
471 
472   // Only compare up to the first line in the buffer, or the string size.
473   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
474   BufferPrefix = BufferPrefix.split('\n').first;
475   return BufferPrefix.edit_distance(ExampleString);
476 }
477 
478 /// Prints additional information about a failure to match involving this
479 /// pattern.
480 void Pattern::PrintFailureInfo(
481     const SourceMgr &SM, StringRef Buffer,
482     const StringMap<StringRef> &VariableTable) const {
483   // If this was a regular expression using variables, print the current
484   // variable values.
485   if (!VariableUses.empty()) {
486     for (const auto &VariableUse : VariableUses) {
487       SmallString<256> Msg;
488       raw_svector_ostream OS(Msg);
489       StringRef Var = VariableUse.first;
490       if (Var[0] == '@') {
491         std::string Value;
492         if (EvaluateExpression(Var, Value)) {
493           OS << "with expression \"";
494           OS.write_escaped(Var) << "\" equal to \"";
495           OS.write_escaped(Value) << "\"";
496         } else {
497           OS << "uses incorrect expression \"";
498           OS.write_escaped(Var) << "\"";
499         }
500       } else {
501         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
502 
503         // Check for undefined variable references.
504         if (it == VariableTable.end()) {
505           OS << "uses undefined variable \"";
506           OS.write_escaped(Var) << "\"";
507         } else {
508           OS << "with variable \"";
509           OS.write_escaped(Var) << "\" equal to \"";
510           OS.write_escaped(it->second) << "\"";
511         }
512       }
513 
514       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
515                       OS.str());
516     }
517   }
518 
519   // Attempt to find the closest/best fuzzy match.  Usually an error happens
520   // because some string in the output didn't exactly match. In these cases, we
521   // would like to show the user a best guess at what "should have" matched, to
522   // save them having to actually check the input manually.
523   size_t NumLinesForward = 0;
524   size_t Best = StringRef::npos;
525   double BestQuality = 0;
526 
527   // Use an arbitrary 4k limit on how far we will search.
528   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
529     if (Buffer[i] == '\n')
530       ++NumLinesForward;
531 
532     // Patterns have leading whitespace stripped, so skip whitespace when
533     // looking for something which looks like a pattern.
534     if (Buffer[i] == ' ' || Buffer[i] == '\t')
535       continue;
536 
537     // Compute the "quality" of this match as an arbitrary combination of the
538     // match distance and the number of lines skipped to get to this match.
539     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
540     double Quality = Distance + (NumLinesForward / 100.);
541 
542     if (Quality < BestQuality || Best == StringRef::npos) {
543       Best = i;
544       BestQuality = Quality;
545     }
546   }
547 
548   // Print the "possible intended match here" line if we found something
549   // reasonable and not equal to what we showed in the "scanning from here"
550   // line.
551   if (Best && Best != StringRef::npos && BestQuality < 50) {
552     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
553                     SourceMgr::DK_Note, "possible intended match here");
554 
555     // FIXME: If we wanted to be really friendly we would show why the match
556     // failed, as it can be hard to spot simple one character differences.
557   }
558 }
559 
560 /// Finds the closing sequence of a regex variable usage or definition.
561 ///
562 /// \p Str has to point in the beginning of the definition (right after the
563 /// opening sequence). Returns the offset of the closing sequence within Str,
564 /// or npos if it was not found.
565 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
566   // Offset keeps track of the current offset within the input Str
567   size_t Offset = 0;
568   // [...] Nesting depth
569   size_t BracketDepth = 0;
570 
571   while (!Str.empty()) {
572     if (Str.startswith("]]") && BracketDepth == 0)
573       return Offset;
574     if (Str[0] == '\\') {
575       // Backslash escapes the next char within regexes, so skip them both.
576       Str = Str.substr(2);
577       Offset += 2;
578     } else {
579       switch (Str[0]) {
580       default:
581         break;
582       case '[':
583         BracketDepth++;
584         break;
585       case ']':
586         if (BracketDepth == 0) {
587           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
588                           SourceMgr::DK_Error,
589                           "missing closing \"]\" for regex variable");
590           exit(1);
591         }
592         BracketDepth--;
593         break;
594       }
595       Str = Str.substr(1);
596       Offset++;
597     }
598   }
599 
600   return StringRef::npos;
601 }
602 
603 //===----------------------------------------------------------------------===//
604 // Check Strings.
605 //===----------------------------------------------------------------------===//
606 
607 /// A check that we found in the input file.
608 struct CheckString {
609   /// The pattern to match.
610   Pattern Pat;
611 
612   /// Which prefix name this check matched.
613   StringRef Prefix;
614 
615   /// The location in the match file that the check string was specified.
616   SMLoc Loc;
617 
618   /// All of the strings that are disallowed from occurring between this match
619   /// string and the previous one (or start of file).
620   std::vector<Pattern> DagNotStrings;
621 
622   CheckString(const Pattern &P, StringRef S, SMLoc L)
623       : Pat(P), Prefix(S), Loc(L) {}
624 
625   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
626                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
627 
628   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
629   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
630   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
631                 const std::vector<const Pattern *> &NotStrings,
632                 StringMap<StringRef> &VariableTable) const;
633   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
634                   std::vector<const Pattern *> &NotStrings,
635                   StringMap<StringRef> &VariableTable) const;
636 };
637 
638 /// Canonicalize whitespaces in the file. Line endings are replaced with
639 /// UNIX-style '\n'.
640 static StringRef CanonicalizeFile(MemoryBuffer &MB,
641                                   SmallVectorImpl<char> &OutputBuffer) {
642   OutputBuffer.reserve(MB.getBufferSize());
643 
644   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
645        Ptr != End; ++Ptr) {
646     // Eliminate trailing dosish \r.
647     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
648       continue;
649     }
650 
651     // If current char is not a horizontal whitespace or if horizontal
652     // whitespace canonicalization is disabled, dump it to output as is.
653     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
654       OutputBuffer.push_back(*Ptr);
655       continue;
656     }
657 
658     // Otherwise, add one space and advance over neighboring space.
659     OutputBuffer.push_back(' ');
660     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
661       ++Ptr;
662   }
663 
664   // Add a null byte and then return all but that byte.
665   OutputBuffer.push_back('\0');
666   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
667 }
668 
669 static bool IsPartOfWord(char c) {
670   return (isalnum(c) || c == '-' || c == '_');
671 }
672 
673 // Get the size of the prefix extension.
674 static size_t CheckTypeSize(Check::CheckType Ty) {
675   switch (Ty) {
676   case Check::CheckNone:
677   case Check::CheckBadNot:
678     return 0;
679 
680   case Check::CheckPlain:
681     return sizeof(":") - 1;
682 
683   case Check::CheckNext:
684     return sizeof("-NEXT:") - 1;
685 
686   case Check::CheckSame:
687     return sizeof("-SAME:") - 1;
688 
689   case Check::CheckNot:
690     return sizeof("-NOT:") - 1;
691 
692   case Check::CheckDAG:
693     return sizeof("-DAG:") - 1;
694 
695   case Check::CheckLabel:
696     return sizeof("-LABEL:") - 1;
697 
698   case Check::CheckEOF:
699     llvm_unreachable("Should not be using EOF size");
700   }
701 
702   llvm_unreachable("Bad check type");
703 }
704 
705 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
706   char NextChar = Buffer[Prefix.size()];
707 
708   // Verify that the : is present after the prefix.
709   if (NextChar == ':')
710     return Check::CheckPlain;
711 
712   if (NextChar != '-')
713     return Check::CheckNone;
714 
715   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
716   if (Rest.startswith("NEXT:"))
717     return Check::CheckNext;
718 
719   if (Rest.startswith("SAME:"))
720     return Check::CheckSame;
721 
722   if (Rest.startswith("NOT:"))
723     return Check::CheckNot;
724 
725   if (Rest.startswith("DAG:"))
726     return Check::CheckDAG;
727 
728   if (Rest.startswith("LABEL:"))
729     return Check::CheckLabel;
730 
731   // You can't combine -NOT with another suffix.
732   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
733       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
734       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
735     return Check::CheckBadNot;
736 
737   return Check::CheckNone;
738 }
739 
740 // From the given position, find the next character after the word.
741 static size_t SkipWord(StringRef Str, size_t Loc) {
742   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
743     ++Loc;
744   return Loc;
745 }
746 
747 /// Search the buffer for the first prefix in the prefix regular expression.
748 ///
749 /// This searches the buffer using the provided regular expression, however it
750 /// enforces constraints beyond that:
751 /// 1) The found prefix must not be a suffix of something that looks like
752 ///    a valid prefix.
753 /// 2) The found prefix must be followed by a valid check type suffix using \c
754 ///    FindCheckType above.
755 ///
756 /// The first match of the regular expression to satisfy these two is returned,
757 /// otherwise an empty StringRef is returned to indicate failure.
758 ///
759 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
760 /// start at the beginning of the returned prefix, increment \p LineNumber for
761 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
762 /// check found by examining the suffix.
763 ///
764 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
765 /// is unspecified.
766 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
767                                          unsigned &LineNumber,
768                                          Check::CheckType &CheckTy) {
769   SmallVector<StringRef, 2> Matches;
770 
771   while (!Buffer.empty()) {
772     // Find the first (longest) match using the RE.
773     if (!PrefixRE.match(Buffer, &Matches))
774       // No match at all, bail.
775       return StringRef();
776 
777     StringRef Prefix = Matches[0];
778     Matches.clear();
779 
780     assert(Prefix.data() >= Buffer.data() &&
781            Prefix.data() < Buffer.data() + Buffer.size() &&
782            "Prefix doesn't start inside of buffer!");
783     size_t Loc = Prefix.data() - Buffer.data();
784     StringRef Skipped = Buffer.substr(0, Loc);
785     Buffer = Buffer.drop_front(Loc);
786     LineNumber += Skipped.count('\n');
787 
788     // Check that the matched prefix isn't a suffix of some other check-like
789     // word.
790     // FIXME: This is a very ad-hoc check. it would be better handled in some
791     // other way. Among other things it seems hard to distinguish between
792     // intentional and unintentional uses of this feature.
793     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
794       // Now extract the type.
795       CheckTy = FindCheckType(Buffer, Prefix);
796 
797       // If we've found a valid check type for this prefix, we're done.
798       if (CheckTy != Check::CheckNone)
799         return Prefix;
800     }
801 
802     // If we didn't successfully find a prefix, we need to skip this invalid
803     // prefix and continue scanning. We directly skip the prefix that was
804     // matched and any additional parts of that check-like word.
805     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
806   }
807 
808   // We ran out of buffer while skipping partial matches so give up.
809   return StringRef();
810 }
811 
812 /// Read the check file, which specifies the sequence of expected strings.
813 ///
814 /// The strings are added to the CheckStrings vector. Returns true in case of
815 /// an error, false otherwise.
816 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
817                           std::vector<CheckString> &CheckStrings) {
818   std::vector<Pattern> ImplicitNegativeChecks;
819   for (const auto &PatternString : ImplicitCheckNot) {
820     // Create a buffer with fake command line content in order to display the
821     // command line option responsible for the specific implicit CHECK-NOT.
822     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
823     std::string Suffix = "'";
824     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
825         Prefix + PatternString + Suffix, "command line");
826 
827     StringRef PatternInBuffer =
828         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
829     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
830 
831     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
832     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
833                                                "IMPLICIT-CHECK", SM, 0);
834   }
835 
836   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
837 
838   // LineNumber keeps track of the line on which CheckPrefix instances are
839   // found.
840   unsigned LineNumber = 1;
841 
842   while (1) {
843     Check::CheckType CheckTy;
844 
845     // See if a prefix occurs in the memory buffer.
846     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
847                                                    CheckTy);
848     if (UsedPrefix.empty())
849       break;
850     assert(UsedPrefix.data() == Buffer.data() &&
851            "Failed to move Buffer's start forward, or pointed prefix outside "
852            "of the buffer!");
853 
854     // Location to use for error messages.
855     const char *UsedPrefixStart = UsedPrefix.data();
856 
857     // Skip the buffer to the end.
858     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
859 
860     // Complain about useful-looking but unsupported suffixes.
861     if (CheckTy == Check::CheckBadNot) {
862       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
863                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
864       return true;
865     }
866 
867     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
868     // leading whitespace.
869     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
870 
871     // Scan ahead to the end of line.
872     size_t EOL = Buffer.find_first_of("\n\r");
873 
874     // Remember the location of the start of the pattern, for diagnostics.
875     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
876 
877     // Parse the pattern.
878     Pattern P(CheckTy);
879     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
880       return true;
881 
882     // Verify that CHECK-LABEL lines do not define or use variables
883     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
884       SM.PrintMessage(
885           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
886           "found '" + UsedPrefix + "-LABEL:'"
887                                    " with variable definition or use");
888       return true;
889     }
890 
891     Buffer = Buffer.substr(EOL);
892 
893     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
894     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
895         CheckStrings.empty()) {
896       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
897       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
898                       SourceMgr::DK_Error,
899                       "found '" + UsedPrefix + "-" + Type +
900                           "' without previous '" + UsedPrefix + ": line");
901       return true;
902     }
903 
904     // Handle CHECK-DAG/-NOT.
905     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
906       DagNotMatches.push_back(P);
907       continue;
908     }
909 
910     // Okay, add the string we captured to the output vector and move on.
911     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
912     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
913     DagNotMatches = ImplicitNegativeChecks;
914   }
915 
916   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
917   // prefix as a filler for the error message.
918   if (!DagNotMatches.empty()) {
919     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
920                               SMLoc::getFromPointer(Buffer.data()));
921     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
922   }
923 
924   if (CheckStrings.empty()) {
925     errs() << "error: no check strings found with prefix"
926            << (CheckPrefixes.size() > 1 ? "es " : " ");
927     prefix_iterator I = CheckPrefixes.begin();
928     prefix_iterator E = CheckPrefixes.end();
929     if (I != E) {
930       errs() << "\'" << *I << ":'";
931       ++I;
932     }
933     for (; I != E; ++I)
934       errs() << ", \'" << *I << ":'";
935 
936     errs() << '\n';
937     return true;
938   }
939 
940   return false;
941 }
942 
943 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
944                              StringRef Buffer,
945                              StringMap<StringRef> &VariableTable) {
946   // Otherwise, we have an error, emit an error message.
947   SM.PrintMessage(Loc, SourceMgr::DK_Error,
948                   "expected string not found in input");
949 
950   // Print the "scanning from here" line.  If the current position is at the
951   // end of a line, advance to the start of the next line.
952   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
953 
954   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
955                   "scanning from here");
956 
957   // Allow the pattern to print additional information if desired.
958   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
959 }
960 
961 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
962                              StringRef Buffer,
963                              StringMap<StringRef> &VariableTable) {
964   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
965 }
966 
967 /// Count the number of newlines in the specified range.
968 static unsigned CountNumNewlinesBetween(StringRef Range,
969                                         const char *&FirstNewLine) {
970   unsigned NumNewLines = 0;
971   while (1) {
972     // Scan for newline.
973     Range = Range.substr(Range.find_first_of("\n\r"));
974     if (Range.empty())
975       return NumNewLines;
976 
977     ++NumNewLines;
978 
979     // Handle \n\r and \r\n as a single newline.
980     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
981         (Range[0] != Range[1]))
982       Range = Range.substr(1);
983     Range = Range.substr(1);
984 
985     if (NumNewLines == 1)
986       FirstNewLine = Range.begin();
987   }
988 }
989 
990 /// Match check string and its "not strings" and/or "dag strings".
991 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
992                           bool IsLabelScanMode, size_t &MatchLen,
993                           StringMap<StringRef> &VariableTable) const {
994   size_t LastPos = 0;
995   std::vector<const Pattern *> NotStrings;
996 
997   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
998   // bounds; we have not processed variable definitions within the bounded block
999   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1000   // over the block again (including the last CHECK-LABEL) in normal mode.
1001   if (!IsLabelScanMode) {
1002     // Match "dag strings" (with mixed "not strings" if any).
1003     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1004     if (LastPos == StringRef::npos)
1005       return StringRef::npos;
1006   }
1007 
1008   // Match itself from the last position after matching CHECK-DAG.
1009   StringRef MatchBuffer = Buffer.substr(LastPos);
1010   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1011   if (MatchPos == StringRef::npos) {
1012     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1013     return StringRef::npos;
1014   }
1015 
1016   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1017   // or CHECK-NOT
1018   if (!IsLabelScanMode) {
1019     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1020 
1021     // If this check is a "CHECK-NEXT", verify that the previous match was on
1022     // the previous line (i.e. that there is one newline between them).
1023     if (CheckNext(SM, SkippedRegion))
1024       return StringRef::npos;
1025 
1026     // If this check is a "CHECK-SAME", verify that the previous match was on
1027     // the same line (i.e. that there is no newline between them).
1028     if (CheckSame(SM, SkippedRegion))
1029       return StringRef::npos;
1030 
1031     // If this match had "not strings", verify that they don't exist in the
1032     // skipped region.
1033     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1034       return StringRef::npos;
1035   }
1036 
1037   return LastPos + MatchPos;
1038 }
1039 
1040 /// Verify there is a single line in the given buffer.
1041 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1042   if (Pat.getCheckTy() != Check::CheckNext)
1043     return false;
1044 
1045   // Count the number of newlines between the previous match and this one.
1046   assert(Buffer.data() !=
1047              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1048                                     SMLoc::getFromPointer(Buffer.data())))
1049                  ->getBufferStart() &&
1050          "CHECK-NEXT can't be the first check in a file");
1051 
1052   const char *FirstNewLine = nullptr;
1053   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1054 
1055   if (NumNewLines == 0) {
1056     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1057                     Prefix + "-NEXT: is on the same line as previous match");
1058     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1059                     "'next' match was here");
1060     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1061                     "previous match ended here");
1062     return true;
1063   }
1064 
1065   if (NumNewLines != 1) {
1066     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1067                     Prefix +
1068                         "-NEXT: is not on the line after the previous match");
1069     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1070                     "'next' match was here");
1071     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1072                     "previous match ended here");
1073     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1074                     "non-matching line after previous match is here");
1075     return true;
1076   }
1077 
1078   return false;
1079 }
1080 
1081 /// Verify there is no newline in the given buffer.
1082 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1083   if (Pat.getCheckTy() != Check::CheckSame)
1084     return false;
1085 
1086   // Count the number of newlines between the previous match and this one.
1087   assert(Buffer.data() !=
1088              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1089                                     SMLoc::getFromPointer(Buffer.data())))
1090                  ->getBufferStart() &&
1091          "CHECK-SAME can't be the first check in a file");
1092 
1093   const char *FirstNewLine = nullptr;
1094   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1095 
1096   if (NumNewLines != 0) {
1097     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1098                     Prefix +
1099                         "-SAME: is not on the same line as the previous match");
1100     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1101                     "'next' match was here");
1102     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1103                     "previous match ended here");
1104     return true;
1105   }
1106 
1107   return false;
1108 }
1109 
1110 /// Verify there's no "not strings" in the given buffer.
1111 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1112                            const std::vector<const Pattern *> &NotStrings,
1113                            StringMap<StringRef> &VariableTable) const {
1114   for (const Pattern *Pat : NotStrings) {
1115     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1116 
1117     size_t MatchLen = 0;
1118     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1119 
1120     if (Pos == StringRef::npos)
1121       continue;
1122 
1123     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
1124                     SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
1125     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1126                     Prefix + "-NOT: pattern specified here");
1127     return true;
1128   }
1129 
1130   return false;
1131 }
1132 
1133 /// Match "dag strings" and their mixed "not strings".
1134 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1135                              std::vector<const Pattern *> &NotStrings,
1136                              StringMap<StringRef> &VariableTable) const {
1137   if (DagNotStrings.empty())
1138     return 0;
1139 
1140   size_t LastPos = 0;
1141   size_t StartPos = LastPos;
1142 
1143   for (const Pattern &Pat : DagNotStrings) {
1144     assert((Pat.getCheckTy() == Check::CheckDAG ||
1145             Pat.getCheckTy() == Check::CheckNot) &&
1146            "Invalid CHECK-DAG or CHECK-NOT!");
1147 
1148     if (Pat.getCheckTy() == Check::CheckNot) {
1149       NotStrings.push_back(&Pat);
1150       continue;
1151     }
1152 
1153     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1154 
1155     size_t MatchLen = 0, MatchPos;
1156 
1157     // CHECK-DAG always matches from the start.
1158     StringRef MatchBuffer = Buffer.substr(StartPos);
1159     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1160     // With a group of CHECK-DAGs, a single mismatching means the match on
1161     // that group of CHECK-DAGs fails immediately.
1162     if (MatchPos == StringRef::npos) {
1163       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1164       return StringRef::npos;
1165     }
1166     // Re-calc it as the offset relative to the start of the original string.
1167     MatchPos += StartPos;
1168 
1169     if (!NotStrings.empty()) {
1170       if (MatchPos < LastPos) {
1171         // Reordered?
1172         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1173                         SourceMgr::DK_Error,
1174                         Prefix + "-DAG: found a match of CHECK-DAG"
1175                                  " reordering across a CHECK-NOT");
1176         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1177                         SourceMgr::DK_Note,
1178                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1179                                  " is found here");
1180         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1181                         Prefix + "-NOT: the crossed pattern specified"
1182                                  " here");
1183         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1184                         Prefix + "-DAG: the reordered pattern specified"
1185                                  " here");
1186         return StringRef::npos;
1187       }
1188       // All subsequent CHECK-DAGs should be matched from the farthest
1189       // position of all precedent CHECK-DAGs (including this one.)
1190       StartPos = LastPos;
1191       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1192       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1193       // region.
1194       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1195       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1196         return StringRef::npos;
1197       // Clear "not strings".
1198       NotStrings.clear();
1199     }
1200 
1201     // Update the last position with CHECK-DAG matches.
1202     LastPos = std::max(MatchPos + MatchLen, LastPos);
1203   }
1204 
1205   return LastPos;
1206 }
1207 
1208 // A check prefix must contain only alphanumeric, hyphens and underscores.
1209 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1210   Regex Validator("^[a-zA-Z0-9_-]*$");
1211   return Validator.match(CheckPrefix);
1212 }
1213 
1214 static bool ValidateCheckPrefixes() {
1215   StringSet<> PrefixSet;
1216 
1217   for (StringRef Prefix : CheckPrefixes) {
1218     // Reject empty prefixes.
1219     if (Prefix == "")
1220       return false;
1221 
1222     if (!PrefixSet.insert(Prefix).second)
1223       return false;
1224 
1225     if (!ValidateCheckPrefix(Prefix))
1226       return false;
1227   }
1228 
1229   return true;
1230 }
1231 
1232 // Combines the check prefixes into a single regex so that we can efficiently
1233 // scan for any of the set.
1234 //
1235 // The semantics are that the longest-match wins which matches our regex
1236 // library.
1237 static Regex buildCheckPrefixRegex() {
1238   // I don't think there's a way to specify an initial value for cl::list,
1239   // so if nothing was specified, add the default
1240   if (CheckPrefixes.empty())
1241     CheckPrefixes.push_back("CHECK");
1242 
1243   // We already validated the contents of CheckPrefixes so just concatenate
1244   // them as alternatives.
1245   SmallString<32> PrefixRegexStr;
1246   for (StringRef Prefix : CheckPrefixes) {
1247     if (Prefix != CheckPrefixes.front())
1248       PrefixRegexStr.push_back('|');
1249 
1250     PrefixRegexStr.append(Prefix);
1251   }
1252 
1253   return Regex(PrefixRegexStr);
1254 }
1255 
1256 static void DumpCommandLine(int argc, char **argv) {
1257   errs() << "FileCheck command line: ";
1258   for (int I = 0; I < argc; I++)
1259     errs() << " " << argv[I];
1260   errs() << "\n";
1261 }
1262 
1263 /// Check the input to FileCheck provided in the \p Buffer against the \p
1264 /// CheckStrings read from the check file.
1265 ///
1266 /// Returns false if the input fails to satisfy the checks.
1267 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1268                 ArrayRef<CheckString> CheckStrings) {
1269   bool ChecksFailed = false;
1270 
1271   /// VariableTable - This holds all the current filecheck variables.
1272   StringMap<StringRef> VariableTable;
1273 
1274   unsigned i = 0, j = 0, e = CheckStrings.size();
1275   while (true) {
1276     StringRef CheckRegion;
1277     if (j == e) {
1278       CheckRegion = Buffer;
1279     } else {
1280       const CheckString &CheckLabelStr = CheckStrings[j];
1281       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1282         ++j;
1283         continue;
1284       }
1285 
1286       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1287       size_t MatchLabelLen = 0;
1288       size_t MatchLabelPos =
1289           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
1290       if (MatchLabelPos == StringRef::npos)
1291         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1292         return false;
1293 
1294       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1295       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1296       ++j;
1297     }
1298 
1299     for (; i != j; ++i) {
1300       const CheckString &CheckStr = CheckStrings[i];
1301 
1302       // Check each string within the scanned region, including a second check
1303       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1304       size_t MatchLen = 0;
1305       size_t MatchPos =
1306           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
1307 
1308       if (MatchPos == StringRef::npos) {
1309         ChecksFailed = true;
1310         i = j;
1311         break;
1312       }
1313 
1314       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1315     }
1316 
1317     if (j == e)
1318       break;
1319   }
1320 
1321   // Success if no checks failed.
1322   return !ChecksFailed;
1323 }
1324 
1325 int main(int argc, char **argv) {
1326   sys::PrintStackTraceOnErrorSignal(argv[0]);
1327   PrettyStackTraceProgram X(argc, argv);
1328   cl::ParseCommandLineOptions(argc, argv);
1329 
1330   if (!ValidateCheckPrefixes()) {
1331     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1332               "start with a letter and contain only alphanumeric characters, "
1333               "hyphens and underscores\n";
1334     return 2;
1335   }
1336 
1337   Regex PrefixRE = buildCheckPrefixRegex();
1338   std::string REError;
1339   if (!PrefixRE.isValid(REError)) {
1340     errs() << "Unable to combine check-prefix strings into a prefix regular "
1341               "expression! This is likely a bug in FileCheck's verification of "
1342               "the check-prefix strings. Regular expression parsing failed "
1343               "with the following error: "
1344            << REError << "\n";
1345     return 2;
1346   }
1347 
1348   SourceMgr SM;
1349 
1350   // Read the expected strings from the check file.
1351   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1352       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1353   if (std::error_code EC = CheckFileOrErr.getError()) {
1354     errs() << "Could not open check file '" << CheckFilename
1355            << "': " << EC.message() << '\n';
1356     return 2;
1357   }
1358   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1359 
1360   SmallString<4096> CheckFileBuffer;
1361   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
1362 
1363   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1364                             CheckFileText, CheckFile.getBufferIdentifier()),
1365                         SMLoc());
1366 
1367   std::vector<CheckString> CheckStrings;
1368   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
1369     return 2;
1370 
1371   // Open the file to check and add it to SourceMgr.
1372   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1373       MemoryBuffer::getFileOrSTDIN(InputFilename);
1374   if (std::error_code EC = InputFileOrErr.getError()) {
1375     errs() << "Could not open input file '" << InputFilename
1376            << "': " << EC.message() << '\n';
1377     return 2;
1378   }
1379   MemoryBuffer &InputFile = *InputFileOrErr.get();
1380 
1381   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1382     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1383     DumpCommandLine(argc, argv);
1384     return 2;
1385   }
1386 
1387   SmallString<4096> InputFileBuffer;
1388   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
1389 
1390   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1391                             InputFileText, InputFile.getBufferIdentifier()),
1392                         SMLoc());
1393 
1394   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1395 }
1396