1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a glob pattern matcher. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Support/GlobPattern.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/Optional.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/Support/Errc.h" 19 20 using namespace llvm; 21 22 static bool hasWildcard(StringRef S) { 23 return S.find_first_of("?*[") != StringRef::npos; 24 } 25 26 // Expands character ranges and returns a bitmap. 27 // For example, "a-cf-hz" is expanded to "abcfghz". 28 static Expected<BitVector> expand(StringRef S, StringRef Original) { 29 BitVector BV(256, false); 30 31 // Expand X-Y. 32 for (;;) { 33 if (S.size() < 3) 34 break; 35 36 // If it doesn't start with something like X-Y, 37 // consume the first character and proceed. 38 if (S[1] != '-') { 39 BV[S[0]] = true; 40 S = S.substr(1); 41 continue; 42 } 43 44 // It must be in the form of X-Y. 45 // Validate it and then interpret the range. 46 if (S[0] > S[2]) 47 return make_error<StringError>("invalid glob pattern: " + Original, 48 errc::invalid_argument); 49 50 for (int C = S[0]; C <= S[2]; ++C) 51 BV[C] = true; 52 S = S.substr(3); 53 } 54 55 for (char C : S) 56 BV[C] = true; 57 return BV; 58 } 59 60 // This is a scanner for the glob pattern. 61 // A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]" 62 // (which is a negative form of "[<chars>]"), or a non-meta character. 63 // This function returns the first token in S. 64 static Expected<BitVector> scan(StringRef &S, StringRef Original) { 65 switch (S[0]) { 66 case '*': 67 S = S.substr(1); 68 // '*' is represented by an empty bitvector. 69 // All other bitvectors are 256-bit long. 70 return BitVector(); 71 case '?': 72 S = S.substr(1); 73 return BitVector(256, true); 74 case '[': { 75 size_t End = S.find(']', 1); 76 if (End == StringRef::npos) 77 return make_error<StringError>("invalid glob pattern: " + Original, 78 errc::invalid_argument); 79 80 StringRef Chars = S.substr(1, End - 1); 81 S = S.substr(End + 1); 82 if (Chars.startswith("^")) { 83 Expected<BitVector> BV = expand(Chars.substr(1), Original); 84 if (!BV) 85 return BV.takeError(); 86 return BV->flip(); 87 } 88 return expand(Chars, Original); 89 } 90 default: 91 BitVector BV(256, false); 92 BV[S[0]] = true; 93 S = S.substr(1); 94 return BV; 95 } 96 } 97 98 Expected<GlobPattern> GlobPattern::create(StringRef S) { 99 GlobPattern Pat; 100 101 // S doesn't contain any metacharacter, 102 // so the regular string comparison should work. 103 if (!hasWildcard(S)) { 104 Pat.Exact = S; 105 return Pat; 106 } 107 108 // S is something like "foo*". We can use startswith(). 109 if (S.endswith("*") && !hasWildcard(S.drop_back())) { 110 Pat.Prefix = S.drop_back(); 111 return Pat; 112 } 113 114 // S is something like "*foo". We can use endswith(). 115 if (S.startswith("*") && !hasWildcard(S.drop_front())) { 116 Pat.Suffix = S.drop_front(); 117 return Pat; 118 } 119 120 // Otherwise, we need to do real glob pattern matching. 121 // Parse the pattern now. 122 StringRef Original = S; 123 while (!S.empty()) { 124 Expected<BitVector> BV = scan(S, Original); 125 if (!BV) 126 return BV.takeError(); 127 Pat.Tokens.push_back(*BV); 128 } 129 return Pat; 130 } 131 132 bool GlobPattern::match(StringRef S) const { 133 if (Exact) 134 return S == *Exact; 135 if (Prefix) 136 return S.startswith(*Prefix); 137 if (Suffix) 138 return S.endswith(*Suffix); 139 return matchOne(Tokens, S); 140 } 141 142 // Runs glob pattern Pats against string S. 143 bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const { 144 for (;;) { 145 if (Pats.empty()) 146 return S.empty(); 147 148 // If Pats[0] is '*', try to match Pats[1..] against all possible 149 // tail strings of S to see at least one pattern succeeds. 150 if (Pats[0].size() == 0) { 151 Pats = Pats.slice(1); 152 if (Pats.empty()) 153 // Fast path. If a pattern is '*', it matches anything. 154 return true; 155 for (size_t I = 0, E = S.size(); I < E; ++I) 156 if (matchOne(Pats, S.substr(I))) 157 return true; 158 return false; 159 } 160 161 // If Pats[0] is not '*', it must consume one character. 162 if (S.empty() || !Pats[0][S[0]]) 163 return false; 164 Pats = Pats.slice(1); 165 S = S.substr(1); 166 } 167 } 168