18f9803b5SOwen Pan //===--- MatchFilePath.cpp - Match file path with pattern -------*- C++ -*-===// 28f9803b5SOwen Pan // 38f9803b5SOwen Pan // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 48f9803b5SOwen Pan // See https://llvm.org/LICENSE.txt for license information. 58f9803b5SOwen Pan // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 68f9803b5SOwen Pan // 78f9803b5SOwen Pan //===----------------------------------------------------------------------===// 88f9803b5SOwen Pan /// 98f9803b5SOwen Pan /// \file 108f9803b5SOwen Pan /// This file implements the functionality of matching a file path name to 118f9803b5SOwen Pan /// a pattern, similar to the POSIX fnmatch() function. 128f9803b5SOwen Pan /// 138f9803b5SOwen Pan //===----------------------------------------------------------------------===// 148f9803b5SOwen Pan 158f9803b5SOwen Pan #include "MatchFilePath.h" 168f9803b5SOwen Pan 178f9803b5SOwen Pan using namespace llvm; 188f9803b5SOwen Pan 198f9803b5SOwen Pan namespace clang { 208f9803b5SOwen Pan namespace format { 218f9803b5SOwen Pan 22ca8441d6SOwen Pan // Check whether `FilePath` matches `Pattern` based on POSIX 2.13.1, 2.13.2, and 23ca8441d6SOwen Pan // Rule 1 of 2.13.3. 248f9803b5SOwen Pan bool matchFilePath(StringRef Pattern, StringRef FilePath) { 258f9803b5SOwen Pan assert(!Pattern.empty()); 268f9803b5SOwen Pan assert(!FilePath.empty()); 278f9803b5SOwen Pan 28cd239493SOwen Pan const auto FilePathBack = FilePath.back(); 29cd239493SOwen Pan 308f9803b5SOwen Pan // No match if `Pattern` ends with a non-meta character not equal to the last 318f9803b5SOwen Pan // character of `FilePath`. 32cd239493SOwen Pan if (const auto C = Pattern.back(); !strchr("?*]", C) && C != FilePathBack) 338f9803b5SOwen Pan return false; 348f9803b5SOwen Pan 358f9803b5SOwen Pan constexpr auto Separator = '/'; 368f9803b5SOwen Pan const auto EOP = Pattern.size(); // End of `Pattern`. 378f9803b5SOwen Pan const auto End = FilePath.size(); // End of `FilePath`. 388f9803b5SOwen Pan unsigned I = 0; // Index to `Pattern`. 398f9803b5SOwen Pan 408f9803b5SOwen Pan for (unsigned J = 0; J < End; ++J) { 418f9803b5SOwen Pan if (I == EOP) 428f9803b5SOwen Pan return false; 438f9803b5SOwen Pan 448f9803b5SOwen Pan switch (const auto F = FilePath[J]; Pattern[I]) { 458f9803b5SOwen Pan case '\\': 468f9803b5SOwen Pan if (++I == EOP || F != Pattern[I]) 478f9803b5SOwen Pan return false; 488f9803b5SOwen Pan break; 498f9803b5SOwen Pan case '?': 508f9803b5SOwen Pan if (F == Separator) 518f9803b5SOwen Pan return false; 528f9803b5SOwen Pan break; 538f9803b5SOwen Pan case '*': { 54cd239493SOwen Pan bool Globstar = I == 0 || Pattern[I - 1] == Separator; 55cd239493SOwen Pan int StarCount = 1; 56cd239493SOwen Pan for (; ++I < EOP && Pattern[I] == '*'; ++StarCount) { 57cd239493SOwen Pan // Skip consecutive stars. 588f9803b5SOwen Pan } 59cd239493SOwen Pan if (StarCount != 2) 60cd239493SOwen Pan Globstar = false; 618f9803b5SOwen Pan const auto K = FilePath.find(Separator, J); // Index of next `Separator`. 628f9803b5SOwen Pan const bool NoMoreSeparatorsInFilePath = K == StringRef::npos; 638f9803b5SOwen Pan if (I == EOP) // `Pattern` ends with a star. 64cd239493SOwen Pan return Globstar || NoMoreSeparatorsInFilePath; 65cd239493SOwen Pan if (Pattern[I] != Separator) { 668f9803b5SOwen Pan // `Pattern` ends with a lone backslash. 678f9803b5SOwen Pan if (Pattern[I] == '\\' && ++I == EOP) 688f9803b5SOwen Pan return false; 69*064da423SOwen Pan Globstar = false; 70cd239493SOwen Pan } 718f9803b5SOwen Pan // The star is followed by a (possibly escaped) `Separator`. 728f9803b5SOwen Pan if (Pattern[I] == Separator) { 73cd239493SOwen Pan if (!Globstar) { 748f9803b5SOwen Pan if (NoMoreSeparatorsInFilePath) 758f9803b5SOwen Pan return false; 768f9803b5SOwen Pan J = K; // Skip to next `Separator` in `FilePath`. 778f9803b5SOwen Pan break; 788f9803b5SOwen Pan } 79cd239493SOwen Pan if (++I == EOP) 80cd239493SOwen Pan return FilePathBack == Separator; 81cd239493SOwen Pan } 828f9803b5SOwen Pan // Recurse. 83cd239493SOwen Pan for (auto Pat = Pattern.substr(I); 84cd239493SOwen Pan J < End && (Globstar || FilePath[J] != Separator); ++J) { 858f9803b5SOwen Pan if (matchFilePath(Pat, FilePath.substr(J))) 868f9803b5SOwen Pan return true; 878f9803b5SOwen Pan } 888f9803b5SOwen Pan return false; 898f9803b5SOwen Pan } 908f9803b5SOwen Pan case '[': 918f9803b5SOwen Pan // Skip e.g. `[!]`. 928f9803b5SOwen Pan if (I + 3 < EOP || (I + 3 == EOP && Pattern[I + 1] != '!')) { 938f9803b5SOwen Pan // Skip unpaired `[`, brackets containing slashes, and `[]`. 948f9803b5SOwen Pan if (const auto K = Pattern.find_first_of("]/", I + 1); 958f9803b5SOwen Pan K != StringRef::npos && Pattern[K] == ']' && K > I + 1) { 968f9803b5SOwen Pan if (F == Separator) 978f9803b5SOwen Pan return false; 988f9803b5SOwen Pan ++I; // After the `[`. 998f9803b5SOwen Pan bool Negated = false; 1008f9803b5SOwen Pan if (Pattern[I] == '!') { 1018f9803b5SOwen Pan Negated = true; 1028f9803b5SOwen Pan ++I; // After the `!`. 1038f9803b5SOwen Pan } 1048f9803b5SOwen Pan bool Match = false; 1058f9803b5SOwen Pan do { 1068f9803b5SOwen Pan if (I + 2 < K && Pattern[I + 1] == '-') { 1078f9803b5SOwen Pan Match = Pattern[I] <= F && F <= Pattern[I + 2]; 1088f9803b5SOwen Pan I += 3; // After the range, e.g. `A-Z`. 1098f9803b5SOwen Pan } else { 1108f9803b5SOwen Pan Match = F == Pattern[I++]; 1118f9803b5SOwen Pan } 1128f9803b5SOwen Pan } while (!Match && I < K); 1138f9803b5SOwen Pan if (Negated ? Match : !Match) 1148f9803b5SOwen Pan return false; 1158f9803b5SOwen Pan I = K + 1; // After the `]`. 1168f9803b5SOwen Pan continue; 1178f9803b5SOwen Pan } 1188f9803b5SOwen Pan } 1198f9803b5SOwen Pan [[fallthrough]]; // Match `[` literally. 1208f9803b5SOwen Pan default: 1218f9803b5SOwen Pan if (F != Pattern[I]) 1228f9803b5SOwen Pan return false; 1238f9803b5SOwen Pan } 1248f9803b5SOwen Pan 1258f9803b5SOwen Pan ++I; 1268f9803b5SOwen Pan } 1278f9803b5SOwen Pan 1288f9803b5SOwen Pan // Match trailing stars with null strings. 1298f9803b5SOwen Pan while (I < EOP && Pattern[I] == '*') 1308f9803b5SOwen Pan ++I; 1318f9803b5SOwen Pan 1328f9803b5SOwen Pan return I == EOP; 1338f9803b5SOwen Pan } 1348f9803b5SOwen Pan 1358f9803b5SOwen Pan } // namespace format 1368f9803b5SOwen Pan } // namespace clang 137