xref: /llvm-project/llvm/lib/Support/GlobPattern.cpp (revision 4553dc46a05ec6f1e2aebcde1ce185772a26780b)
1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a glob pattern matcher.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/GlobPattern.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Support/Errc.h"
17 
18 using namespace llvm;
19 
20 // Expands character ranges and returns a bitmap.
21 // For example, "a-cf-hz" is expanded to "abcfghz".
22 static Expected<BitVector> expand(StringRef S, StringRef Original) {
23   BitVector BV(256, false);
24 
25   // Expand X-Y.
26   for (;;) {
27     if (S.size() < 3)
28       break;
29 
30     uint8_t Start = S[0];
31     uint8_t End = S[2];
32 
33     // If it doesn't start with something like X-Y,
34     // consume the first character and proceed.
35     if (S[1] != '-') {
36       BV[Start] = true;
37       S = S.substr(1);
38       continue;
39     }
40 
41     // It must be in the form of X-Y.
42     // Validate it and then interpret the range.
43     if (Start > End)
44       return make_error<StringError>("invalid glob pattern: " + Original,
45                                      errc::invalid_argument);
46 
47     for (int C = Start; C <= End; ++C)
48       BV[(uint8_t)C] = true;
49     S = S.substr(3);
50   }
51 
52   for (char C : S)
53     BV[(uint8_t)C] = true;
54   return BV;
55 }
56 
57 Expected<GlobPattern> GlobPattern::create(StringRef S) {
58   GlobPattern Pat;
59 
60   // Store the prefix that does not contain any metacharacter.
61   size_t PrefixSize = S.find_first_of("?*[\\");
62   Pat.Prefix = S.substr(0, PrefixSize);
63   if (PrefixSize == std::string::npos)
64     return Pat;
65   StringRef Original = S;
66   S = S.substr(PrefixSize);
67 
68   // Parse brackets.
69   Pat.Pat = S;
70   for (size_t I = 0, E = S.size(); I != E; ++I) {
71     if (S[I] == '[') {
72       // ']' is allowed as the first character of a character class. '[]' is
73       // invalid. So, just skip the first character.
74       ++I;
75       size_t J = S.find(']', I + 1);
76       if (J == StringRef::npos)
77         return make_error<StringError>("invalid glob pattern: " + Original,
78                                        errc::invalid_argument);
79       StringRef Chars = S.substr(I, J - I);
80       bool Invert = S[I] == '^' || S[I] == '!';
81       Expected<BitVector> BV =
82           Invert ? expand(Chars.substr(1), S) : expand(Chars, S);
83       if (!BV)
84         return BV.takeError();
85       if (Invert)
86         BV->flip();
87       Pat.Brackets.push_back(Bracket{S.data() + J + 1, std::move(*BV)});
88       I = J;
89     } else if (S[I] == '\\') {
90       if (++I == E)
91         return make_error<StringError>("invalid glob pattern, stray '\\'",
92                                        errc::invalid_argument);
93     }
94   }
95   return Pat;
96 }
97 
98 bool GlobPattern::match(StringRef S) const {
99   return S.consume_front(Prefix) && matchOne(S);
100 }
101 
102 // Factor the pattern into segments split by '*'. The segment is matched
103 // sequentianlly by finding the first occurrence past the end of the previous
104 // match.
105 bool GlobPattern::matchOne(StringRef Str) const {
106   const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
107              *SavedS = S;
108   const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
109   size_t B = 0, SavedB = 0;
110   while (S != End) {
111     if (P == PEnd)
112       ;
113     else if (*P == '*') {
114       // The non-* substring on the left of '*' matches the tail of S. Save the
115       // positions to be used by backtracking if we see a mismatch later.
116       SegmentBegin = ++P;
117       SavedS = S;
118       SavedB = B;
119       continue;
120     } else if (*P == '[') {
121       if (Brackets[B].Bytes[uint8_t(*S)]) {
122         P = Brackets[B++].Next;
123         ++S;
124         continue;
125       }
126     } else if (*P == '\\') {
127       if (*++P == *S) {
128         ++P;
129         ++S;
130         continue;
131       }
132     } else if (*P == *S || *P == '?') {
133       ++P;
134       ++S;
135       continue;
136     }
137     if (!SegmentBegin)
138       return false;
139     // We have seen a '*'. Backtrack to the saved positions. Shift the S
140     // position to probe the next starting position in the segment.
141     P = SegmentBegin;
142     S = ++SavedS;
143     B = SavedB;
144   }
145   // All bytes in Str have been matched. Return true if the rest part of Pat is
146   // empty or contains only '*'.
147   return Pat.find_first_not_of('*', P - Pat.data()) == std::string::npos;
148 }
149