1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a glob pattern matcher. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/GlobPattern.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/Support/Errc.h" 17 18 using namespace llvm; 19 20 // Expands character ranges and returns a bitmap. 21 // For example, "a-cf-hz" is expanded to "abcfghz". 22 static Expected<BitVector> expand(StringRef S, StringRef Original) { 23 BitVector BV(256, false); 24 25 // Expand X-Y. 26 for (;;) { 27 if (S.size() < 3) 28 break; 29 30 uint8_t Start = S[0]; 31 uint8_t End = S[2]; 32 33 // If it doesn't start with something like X-Y, 34 // consume the first character and proceed. 35 if (S[1] != '-') { 36 BV[Start] = true; 37 S = S.substr(1); 38 continue; 39 } 40 41 // It must be in the form of X-Y. 42 // Validate it and then interpret the range. 43 if (Start > End) 44 return make_error<StringError>("invalid glob pattern: " + Original, 45 errc::invalid_argument); 46 47 for (int C = Start; C <= End; ++C) 48 BV[(uint8_t)C] = true; 49 S = S.substr(3); 50 } 51 52 for (char C : S) 53 BV[(uint8_t)C] = true; 54 return BV; 55 } 56 57 Expected<GlobPattern> GlobPattern::create(StringRef S) { 58 GlobPattern Pat; 59 60 // Store the prefix that does not contain any metacharacter. 61 size_t PrefixSize = S.find_first_of("?*[\\"); 62 Pat.Prefix = S.substr(0, PrefixSize); 63 if (PrefixSize == std::string::npos) 64 return Pat; 65 StringRef Original = S; 66 S = S.substr(PrefixSize); 67 68 // Parse brackets. 69 Pat.Pat = S; 70 for (size_t I = 0, E = S.size(); I != E; ++I) { 71 if (S[I] == '[') { 72 // ']' is allowed as the first character of a character class. '[]' is 73 // invalid. So, just skip the first character. 74 ++I; 75 size_t J = S.find(']', I + 1); 76 if (J == StringRef::npos) 77 return make_error<StringError>("invalid glob pattern: " + Original, 78 errc::invalid_argument); 79 StringRef Chars = S.substr(I, J - I); 80 bool Invert = S[I] == '^' || S[I] == '!'; 81 Expected<BitVector> BV = 82 Invert ? expand(Chars.substr(1), S) : expand(Chars, S); 83 if (!BV) 84 return BV.takeError(); 85 if (Invert) 86 BV->flip(); 87 Pat.Brackets.push_back(Bracket{S.data() + J + 1, std::move(*BV)}); 88 I = J; 89 } else if (S[I] == '\\') { 90 if (++I == E) 91 return make_error<StringError>("invalid glob pattern, stray '\\'", 92 errc::invalid_argument); 93 } 94 } 95 return Pat; 96 } 97 98 bool GlobPattern::match(StringRef S) const { 99 return S.consume_front(Prefix) && matchOne(S); 100 } 101 102 // Factor the pattern into segments split by '*'. The segment is matched 103 // sequentianlly by finding the first occurrence past the end of the previous 104 // match. 105 bool GlobPattern::matchOne(StringRef Str) const { 106 const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(), 107 *SavedS = S; 108 const char *const PEnd = P + Pat.size(), *const End = S + Str.size(); 109 size_t B = 0, SavedB = 0; 110 while (S != End) { 111 if (P == PEnd) 112 ; 113 else if (*P == '*') { 114 // The non-* substring on the left of '*' matches the tail of S. Save the 115 // positions to be used by backtracking if we see a mismatch later. 116 SegmentBegin = ++P; 117 SavedS = S; 118 SavedB = B; 119 continue; 120 } else if (*P == '[') { 121 if (Brackets[B].Bytes[uint8_t(*S)]) { 122 P = Brackets[B++].Next; 123 ++S; 124 continue; 125 } 126 } else if (*P == '\\') { 127 if (*++P == *S) { 128 ++P; 129 ++S; 130 continue; 131 } 132 } else if (*P == *S || *P == '?') { 133 ++P; 134 ++S; 135 continue; 136 } 137 if (!SegmentBegin) 138 return false; 139 // We have seen a '*'. Backtrack to the saved positions. Shift the S 140 // position to probe the next starting position in the segment. 141 P = SegmentBegin; 142 S = ++SavedS; 143 B = SavedB; 144 } 145 // All bytes in Str have been matched. Return true if the rest part of Pat is 146 // empty or contains only '*'. 147 return Pat.find_first_not_of('*', P - Pat.data()) == std::string::npos; 148 } 149