xref: /llvm-project/clang/lib/Format/MatchFilePath.cpp (revision 064da423c3b46907f5011a4537a88fbae9ac03d4)
18f9803b5SOwen Pan //===--- MatchFilePath.cpp - Match file path with pattern -------*- C++ -*-===//
28f9803b5SOwen Pan //
38f9803b5SOwen Pan // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
48f9803b5SOwen Pan // See https://llvm.org/LICENSE.txt for license information.
58f9803b5SOwen Pan // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
68f9803b5SOwen Pan //
78f9803b5SOwen Pan //===----------------------------------------------------------------------===//
88f9803b5SOwen Pan ///
98f9803b5SOwen Pan /// \file
108f9803b5SOwen Pan /// This file implements the functionality of matching a file path name to
118f9803b5SOwen Pan /// a pattern, similar to the POSIX fnmatch() function.
128f9803b5SOwen Pan ///
138f9803b5SOwen Pan //===----------------------------------------------------------------------===//
148f9803b5SOwen Pan 
158f9803b5SOwen Pan #include "MatchFilePath.h"
168f9803b5SOwen Pan 
178f9803b5SOwen Pan using namespace llvm;
188f9803b5SOwen Pan 
198f9803b5SOwen Pan namespace clang {
208f9803b5SOwen Pan namespace format {
218f9803b5SOwen Pan 
22ca8441d6SOwen Pan // Check whether `FilePath` matches `Pattern` based on POSIX 2.13.1, 2.13.2, and
23ca8441d6SOwen Pan // Rule 1 of 2.13.3.
248f9803b5SOwen Pan bool matchFilePath(StringRef Pattern, StringRef FilePath) {
258f9803b5SOwen Pan   assert(!Pattern.empty());
268f9803b5SOwen Pan   assert(!FilePath.empty());
278f9803b5SOwen Pan 
28cd239493SOwen Pan   const auto FilePathBack = FilePath.back();
29cd239493SOwen Pan 
308f9803b5SOwen Pan   // No match if `Pattern` ends with a non-meta character not equal to the last
318f9803b5SOwen Pan   // character of `FilePath`.
32cd239493SOwen Pan   if (const auto C = Pattern.back(); !strchr("?*]", C) && C != FilePathBack)
338f9803b5SOwen Pan     return false;
348f9803b5SOwen Pan 
358f9803b5SOwen Pan   constexpr auto Separator = '/';
368f9803b5SOwen Pan   const auto EOP = Pattern.size();  // End of `Pattern`.
378f9803b5SOwen Pan   const auto End = FilePath.size(); // End of `FilePath`.
388f9803b5SOwen Pan   unsigned I = 0;                   // Index to `Pattern`.
398f9803b5SOwen Pan 
408f9803b5SOwen Pan   for (unsigned J = 0; J < End; ++J) {
418f9803b5SOwen Pan     if (I == EOP)
428f9803b5SOwen Pan       return false;
438f9803b5SOwen Pan 
448f9803b5SOwen Pan     switch (const auto F = FilePath[J]; Pattern[I]) {
458f9803b5SOwen Pan     case '\\':
468f9803b5SOwen Pan       if (++I == EOP || F != Pattern[I])
478f9803b5SOwen Pan         return false;
488f9803b5SOwen Pan       break;
498f9803b5SOwen Pan     case '?':
508f9803b5SOwen Pan       if (F == Separator)
518f9803b5SOwen Pan         return false;
528f9803b5SOwen Pan       break;
538f9803b5SOwen Pan     case '*': {
54cd239493SOwen Pan       bool Globstar = I == 0 || Pattern[I - 1] == Separator;
55cd239493SOwen Pan       int StarCount = 1;
56cd239493SOwen Pan       for (; ++I < EOP && Pattern[I] == '*'; ++StarCount) {
57cd239493SOwen Pan         // Skip consecutive stars.
588f9803b5SOwen Pan       }
59cd239493SOwen Pan       if (StarCount != 2)
60cd239493SOwen Pan         Globstar = false;
618f9803b5SOwen Pan       const auto K = FilePath.find(Separator, J); // Index of next `Separator`.
628f9803b5SOwen Pan       const bool NoMoreSeparatorsInFilePath = K == StringRef::npos;
638f9803b5SOwen Pan       if (I == EOP) // `Pattern` ends with a star.
64cd239493SOwen Pan         return Globstar || NoMoreSeparatorsInFilePath;
65cd239493SOwen Pan       if (Pattern[I] != Separator) {
668f9803b5SOwen Pan         // `Pattern` ends with a lone backslash.
678f9803b5SOwen Pan         if (Pattern[I] == '\\' && ++I == EOP)
688f9803b5SOwen Pan           return false;
69*064da423SOwen Pan         Globstar = false;
70cd239493SOwen Pan       }
718f9803b5SOwen Pan       // The star is followed by a (possibly escaped) `Separator`.
728f9803b5SOwen Pan       if (Pattern[I] == Separator) {
73cd239493SOwen Pan         if (!Globstar) {
748f9803b5SOwen Pan           if (NoMoreSeparatorsInFilePath)
758f9803b5SOwen Pan             return false;
768f9803b5SOwen Pan           J = K; // Skip to next `Separator` in `FilePath`.
778f9803b5SOwen Pan           break;
788f9803b5SOwen Pan         }
79cd239493SOwen Pan         if (++I == EOP)
80cd239493SOwen Pan           return FilePathBack == Separator;
81cd239493SOwen Pan       }
828f9803b5SOwen Pan       // Recurse.
83cd239493SOwen Pan       for (auto Pat = Pattern.substr(I);
84cd239493SOwen Pan            J < End && (Globstar || FilePath[J] != Separator); ++J) {
858f9803b5SOwen Pan         if (matchFilePath(Pat, FilePath.substr(J)))
868f9803b5SOwen Pan           return true;
878f9803b5SOwen Pan       }
888f9803b5SOwen Pan       return false;
898f9803b5SOwen Pan     }
908f9803b5SOwen Pan     case '[':
918f9803b5SOwen Pan       // Skip e.g. `[!]`.
928f9803b5SOwen Pan       if (I + 3 < EOP || (I + 3 == EOP && Pattern[I + 1] != '!')) {
938f9803b5SOwen Pan         // Skip unpaired `[`, brackets containing slashes, and `[]`.
948f9803b5SOwen Pan         if (const auto K = Pattern.find_first_of("]/", I + 1);
958f9803b5SOwen Pan             K != StringRef::npos && Pattern[K] == ']' && K > I + 1) {
968f9803b5SOwen Pan           if (F == Separator)
978f9803b5SOwen Pan             return false;
988f9803b5SOwen Pan           ++I; // After the `[`.
998f9803b5SOwen Pan           bool Negated = false;
1008f9803b5SOwen Pan           if (Pattern[I] == '!') {
1018f9803b5SOwen Pan             Negated = true;
1028f9803b5SOwen Pan             ++I; // After the `!`.
1038f9803b5SOwen Pan           }
1048f9803b5SOwen Pan           bool Match = false;
1058f9803b5SOwen Pan           do {
1068f9803b5SOwen Pan             if (I + 2 < K && Pattern[I + 1] == '-') {
1078f9803b5SOwen Pan               Match = Pattern[I] <= F && F <= Pattern[I + 2];
1088f9803b5SOwen Pan               I += 3; // After the range, e.g. `A-Z`.
1098f9803b5SOwen Pan             } else {
1108f9803b5SOwen Pan               Match = F == Pattern[I++];
1118f9803b5SOwen Pan             }
1128f9803b5SOwen Pan           } while (!Match && I < K);
1138f9803b5SOwen Pan           if (Negated ? Match : !Match)
1148f9803b5SOwen Pan             return false;
1158f9803b5SOwen Pan           I = K + 1; // After the `]`.
1168f9803b5SOwen Pan           continue;
1178f9803b5SOwen Pan         }
1188f9803b5SOwen Pan       }
1198f9803b5SOwen Pan       [[fallthrough]]; // Match `[` literally.
1208f9803b5SOwen Pan     default:
1218f9803b5SOwen Pan       if (F != Pattern[I])
1228f9803b5SOwen Pan         return false;
1238f9803b5SOwen Pan     }
1248f9803b5SOwen Pan 
1258f9803b5SOwen Pan     ++I;
1268f9803b5SOwen Pan   }
1278f9803b5SOwen Pan 
1288f9803b5SOwen Pan   // Match trailing stars with null strings.
1298f9803b5SOwen Pan   while (I < EOP && Pattern[I] == '*')
1308f9803b5SOwen Pan     ++I;
1318f9803b5SOwen Pan 
1328f9803b5SOwen Pan   return I == EOP;
1338f9803b5SOwen Pan }
1348f9803b5SOwen Pan 
1358f9803b5SOwen Pan } // namespace format
1368f9803b5SOwen Pan } // namespace clang
137