xref: /freebsd-src/contrib/llvm-project/libcxx/src/filesystem/path_parser.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
106c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
206c3fb27SDimitry Andric //
306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
806c3fb27SDimitry Andric 
906c3fb27SDimitry Andric #ifndef PATH_PARSER_H
1006c3fb27SDimitry Andric #define PATH_PARSER_H
1106c3fb27SDimitry Andric 
1206c3fb27SDimitry Andric #include <__config>
1306c3fb27SDimitry Andric #include <__utility/unreachable.h>
1406c3fb27SDimitry Andric #include <cstddef>
1506c3fb27SDimitry Andric #include <filesystem>
1606c3fb27SDimitry Andric #include <utility>
1706c3fb27SDimitry Andric 
1806c3fb27SDimitry Andric #include "format_string.h"
1906c3fb27SDimitry Andric 
2006c3fb27SDimitry Andric _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
2106c3fb27SDimitry Andric 
2206c3fb27SDimitry Andric inline bool isSeparator(path::value_type C) {
2306c3fb27SDimitry Andric   if (C == '/')
2406c3fb27SDimitry Andric     return true;
2506c3fb27SDimitry Andric #if defined(_LIBCPP_WIN32API)
2606c3fb27SDimitry Andric   if (C == '\\')
2706c3fb27SDimitry Andric     return true;
2806c3fb27SDimitry Andric #endif
2906c3fb27SDimitry Andric   return false;
3006c3fb27SDimitry Andric }
3106c3fb27SDimitry Andric 
32cb14a3feSDimitry Andric inline bool isDriveLetter(path::value_type C) { return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'); }
3306c3fb27SDimitry Andric 
3406c3fb27SDimitry Andric namespace parser {
3506c3fb27SDimitry Andric 
3606c3fb27SDimitry Andric using string_view_t    = path::__string_view;
3706c3fb27SDimitry Andric using string_view_pair = pair<string_view_t, string_view_t>;
3806c3fb27SDimitry Andric using PosPtr           = path::value_type const*;
3906c3fb27SDimitry Andric 
4006c3fb27SDimitry Andric struct PathParser {
4106c3fb27SDimitry Andric   enum ParserState : unsigned char {
4206c3fb27SDimitry Andric     // Zero is a special sentinel value used by default constructed iterators.
4306c3fb27SDimitry Andric     PS_BeforeBegin   = path::iterator::_BeforeBegin,
4406c3fb27SDimitry Andric     PS_InRootName    = path::iterator::_InRootName,
4506c3fb27SDimitry Andric     PS_InRootDir     = path::iterator::_InRootDir,
4606c3fb27SDimitry Andric     PS_InFilenames   = path::iterator::_InFilenames,
4706c3fb27SDimitry Andric     PS_InTrailingSep = path::iterator::_InTrailingSep,
4806c3fb27SDimitry Andric     PS_AtEnd         = path::iterator::_AtEnd
4906c3fb27SDimitry Andric   };
5006c3fb27SDimitry Andric 
5106c3fb27SDimitry Andric   const string_view_t Path;
5206c3fb27SDimitry Andric   string_view_t RawEntry;
53*0fca6ea1SDimitry Andric   ParserState State_;
5406c3fb27SDimitry Andric 
5506c3fb27SDimitry Andric private:
56*0fca6ea1SDimitry Andric   PathParser(string_view_t P, ParserState State) noexcept : Path(P), State_(State) {}
5706c3fb27SDimitry Andric 
5806c3fb27SDimitry Andric public:
5906c3fb27SDimitry Andric   PathParser(string_view_t P, string_view_t E, unsigned char S)
60*0fca6ea1SDimitry Andric       : Path(P), RawEntry(E), State_(static_cast<ParserState>(S)) {
6106c3fb27SDimitry Andric     // S cannot be '0' or PS_BeforeBegin.
6206c3fb27SDimitry Andric   }
6306c3fb27SDimitry Andric 
6406c3fb27SDimitry Andric   static PathParser CreateBegin(string_view_t P) noexcept {
6506c3fb27SDimitry Andric     PathParser PP(P, PS_BeforeBegin);
6606c3fb27SDimitry Andric     PP.increment();
6706c3fb27SDimitry Andric     return PP;
6806c3fb27SDimitry Andric   }
6906c3fb27SDimitry Andric 
7006c3fb27SDimitry Andric   static PathParser CreateEnd(string_view_t P) noexcept {
7106c3fb27SDimitry Andric     PathParser PP(P, PS_AtEnd);
7206c3fb27SDimitry Andric     return PP;
7306c3fb27SDimitry Andric   }
7406c3fb27SDimitry Andric 
7506c3fb27SDimitry Andric   PosPtr peek() const noexcept {
7606c3fb27SDimitry Andric     auto TkEnd = getNextTokenStartPos();
7706c3fb27SDimitry Andric     auto End   = getAfterBack();
7806c3fb27SDimitry Andric     return TkEnd == End ? nullptr : TkEnd;
7906c3fb27SDimitry Andric   }
8006c3fb27SDimitry Andric 
8106c3fb27SDimitry Andric   void increment() noexcept {
8206c3fb27SDimitry Andric     const PosPtr End   = getAfterBack();
8306c3fb27SDimitry Andric     const PosPtr Start = getNextTokenStartPos();
8406c3fb27SDimitry Andric     if (Start == End)
8506c3fb27SDimitry Andric       return makeState(PS_AtEnd);
8606c3fb27SDimitry Andric 
87*0fca6ea1SDimitry Andric     switch (State_) {
8806c3fb27SDimitry Andric     case PS_BeforeBegin: {
8906c3fb27SDimitry Andric       PosPtr TkEnd = consumeRootName(Start, End);
9006c3fb27SDimitry Andric       if (TkEnd)
9106c3fb27SDimitry Andric         return makeState(PS_InRootName, Start, TkEnd);
9206c3fb27SDimitry Andric     }
9306c3fb27SDimitry Andric       _LIBCPP_FALLTHROUGH();
9406c3fb27SDimitry Andric     case PS_InRootName: {
9506c3fb27SDimitry Andric       PosPtr TkEnd = consumeAllSeparators(Start, End);
9606c3fb27SDimitry Andric       if (TkEnd)
9706c3fb27SDimitry Andric         return makeState(PS_InRootDir, Start, TkEnd);
9806c3fb27SDimitry Andric       else
9906c3fb27SDimitry Andric         return makeState(PS_InFilenames, Start, consumeName(Start, End));
10006c3fb27SDimitry Andric     }
10106c3fb27SDimitry Andric     case PS_InRootDir:
10206c3fb27SDimitry Andric       return makeState(PS_InFilenames, Start, consumeName(Start, End));
10306c3fb27SDimitry Andric 
10406c3fb27SDimitry Andric     case PS_InFilenames: {
10506c3fb27SDimitry Andric       PosPtr SepEnd = consumeAllSeparators(Start, End);
10606c3fb27SDimitry Andric       if (SepEnd != End) {
10706c3fb27SDimitry Andric         PosPtr TkEnd = consumeName(SepEnd, End);
10806c3fb27SDimitry Andric         if (TkEnd)
10906c3fb27SDimitry Andric           return makeState(PS_InFilenames, SepEnd, TkEnd);
11006c3fb27SDimitry Andric       }
11106c3fb27SDimitry Andric       return makeState(PS_InTrailingSep, Start, SepEnd);
11206c3fb27SDimitry Andric     }
11306c3fb27SDimitry Andric 
11406c3fb27SDimitry Andric     case PS_InTrailingSep:
11506c3fb27SDimitry Andric       return makeState(PS_AtEnd);
11606c3fb27SDimitry Andric 
11706c3fb27SDimitry Andric     case PS_AtEnd:
11806c3fb27SDimitry Andric       __libcpp_unreachable();
11906c3fb27SDimitry Andric     }
12006c3fb27SDimitry Andric   }
12106c3fb27SDimitry Andric 
12206c3fb27SDimitry Andric   void decrement() noexcept {
12306c3fb27SDimitry Andric     const PosPtr REnd   = getBeforeFront();
12406c3fb27SDimitry Andric     const PosPtr RStart = getCurrentTokenStartPos() - 1;
12506c3fb27SDimitry Andric     if (RStart == REnd) // we're decrementing the begin
12606c3fb27SDimitry Andric       return makeState(PS_BeforeBegin);
12706c3fb27SDimitry Andric 
128*0fca6ea1SDimitry Andric     switch (State_) {
12906c3fb27SDimitry Andric     case PS_AtEnd: {
13006c3fb27SDimitry Andric       // Try to consume a trailing separator or root directory first.
13106c3fb27SDimitry Andric       if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) {
13206c3fb27SDimitry Andric         if (SepEnd == REnd)
13306c3fb27SDimitry Andric           return makeState(PS_InRootDir, Path.data(), RStart + 1);
13406c3fb27SDimitry Andric         PosPtr TkStart = consumeRootName(SepEnd, REnd);
13506c3fb27SDimitry Andric         if (TkStart == REnd)
13606c3fb27SDimitry Andric           return makeState(PS_InRootDir, RStart, RStart + 1);
13706c3fb27SDimitry Andric         return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1);
13806c3fb27SDimitry Andric       } else {
13906c3fb27SDimitry Andric         PosPtr TkStart = consumeRootName(RStart, REnd);
14006c3fb27SDimitry Andric         if (TkStart == REnd)
14106c3fb27SDimitry Andric           return makeState(PS_InRootName, TkStart + 1, RStart + 1);
14206c3fb27SDimitry Andric         TkStart = consumeName(RStart, REnd);
14306c3fb27SDimitry Andric         return makeState(PS_InFilenames, TkStart + 1, RStart + 1);
14406c3fb27SDimitry Andric       }
14506c3fb27SDimitry Andric     }
14606c3fb27SDimitry Andric     case PS_InTrailingSep:
147cb14a3feSDimitry Andric       return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1, RStart + 1);
14806c3fb27SDimitry Andric     case PS_InFilenames: {
14906c3fb27SDimitry Andric       PosPtr SepEnd = consumeAllSeparators(RStart, REnd);
15006c3fb27SDimitry Andric       if (SepEnd == REnd)
15106c3fb27SDimitry Andric         return makeState(PS_InRootDir, Path.data(), RStart + 1);
15206c3fb27SDimitry Andric       PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd);
15306c3fb27SDimitry Andric       if (TkStart == REnd) {
15406c3fb27SDimitry Andric         if (SepEnd)
15506c3fb27SDimitry Andric           return makeState(PS_InRootDir, SepEnd + 1, RStart + 1);
15606c3fb27SDimitry Andric         return makeState(PS_InRootName, TkStart + 1, RStart + 1);
15706c3fb27SDimitry Andric       }
15806c3fb27SDimitry Andric       TkStart = consumeName(SepEnd, REnd);
15906c3fb27SDimitry Andric       return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1);
16006c3fb27SDimitry Andric     }
16106c3fb27SDimitry Andric     case PS_InRootDir:
16206c3fb27SDimitry Andric       return makeState(PS_InRootName, Path.data(), RStart + 1);
16306c3fb27SDimitry Andric     case PS_InRootName:
16406c3fb27SDimitry Andric     case PS_BeforeBegin:
16506c3fb27SDimitry Andric       __libcpp_unreachable();
16606c3fb27SDimitry Andric     }
16706c3fb27SDimitry Andric   }
16806c3fb27SDimitry Andric 
16906c3fb27SDimitry Andric   /// \brief Return a view with the "preferred representation" of the current
17006c3fb27SDimitry Andric   ///   element. For example trailing separators are represented as a '.'
17106c3fb27SDimitry Andric   string_view_t operator*() const noexcept {
172*0fca6ea1SDimitry Andric     switch (State_) {
17306c3fb27SDimitry Andric     case PS_BeforeBegin:
17406c3fb27SDimitry Andric     case PS_AtEnd:
17506c3fb27SDimitry Andric       return PATHSTR("");
17606c3fb27SDimitry Andric     case PS_InRootDir:
17706c3fb27SDimitry Andric       if (RawEntry[0] == '\\')
17806c3fb27SDimitry Andric         return PATHSTR("\\");
17906c3fb27SDimitry Andric       else
18006c3fb27SDimitry Andric         return PATHSTR("/");
18106c3fb27SDimitry Andric     case PS_InTrailingSep:
18206c3fb27SDimitry Andric       return PATHSTR("");
18306c3fb27SDimitry Andric     case PS_InRootName:
18406c3fb27SDimitry Andric     case PS_InFilenames:
18506c3fb27SDimitry Andric       return RawEntry;
18606c3fb27SDimitry Andric     }
18706c3fb27SDimitry Andric     __libcpp_unreachable();
18806c3fb27SDimitry Andric   }
18906c3fb27SDimitry Andric 
190*0fca6ea1SDimitry Andric   explicit operator bool() const noexcept { return State_ != PS_BeforeBegin && State_ != PS_AtEnd; }
19106c3fb27SDimitry Andric 
19206c3fb27SDimitry Andric   PathParser& operator++() noexcept {
19306c3fb27SDimitry Andric     increment();
19406c3fb27SDimitry Andric     return *this;
19506c3fb27SDimitry Andric   }
19606c3fb27SDimitry Andric 
19706c3fb27SDimitry Andric   PathParser& operator--() noexcept {
19806c3fb27SDimitry Andric     decrement();
19906c3fb27SDimitry Andric     return *this;
20006c3fb27SDimitry Andric   }
20106c3fb27SDimitry Andric 
202*0fca6ea1SDimitry Andric   bool atEnd() const noexcept { return State_ == PS_AtEnd; }
20306c3fb27SDimitry Andric 
204*0fca6ea1SDimitry Andric   bool inRootDir() const noexcept { return State_ == PS_InRootDir; }
20506c3fb27SDimitry Andric 
206*0fca6ea1SDimitry Andric   bool inRootName() const noexcept { return State_ == PS_InRootName; }
20706c3fb27SDimitry Andric 
208cb14a3feSDimitry Andric   bool inRootPath() const noexcept { return inRootName() || inRootDir(); }
20906c3fb27SDimitry Andric 
21006c3fb27SDimitry Andric private:
21106c3fb27SDimitry Andric   void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept {
212*0fca6ea1SDimitry Andric     State_    = NewState;
21306c3fb27SDimitry Andric     RawEntry = string_view_t(Start, End - Start);
21406c3fb27SDimitry Andric   }
21506c3fb27SDimitry Andric   void makeState(ParserState NewState) noexcept {
216*0fca6ea1SDimitry Andric     State_    = NewState;
21706c3fb27SDimitry Andric     RawEntry = {};
21806c3fb27SDimitry Andric   }
21906c3fb27SDimitry Andric 
22006c3fb27SDimitry Andric   PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); }
22106c3fb27SDimitry Andric 
22206c3fb27SDimitry Andric   PosPtr getBeforeFront() const noexcept { return Path.data() - 1; }
22306c3fb27SDimitry Andric 
22406c3fb27SDimitry Andric   /// \brief Return a pointer to the first character after the currently
22506c3fb27SDimitry Andric   ///   lexed element.
22606c3fb27SDimitry Andric   PosPtr getNextTokenStartPos() const noexcept {
227*0fca6ea1SDimitry Andric     switch (State_) {
22806c3fb27SDimitry Andric     case PS_BeforeBegin:
22906c3fb27SDimitry Andric       return Path.data();
23006c3fb27SDimitry Andric     case PS_InRootName:
23106c3fb27SDimitry Andric     case PS_InRootDir:
23206c3fb27SDimitry Andric     case PS_InFilenames:
23306c3fb27SDimitry Andric       return &RawEntry.back() + 1;
23406c3fb27SDimitry Andric     case PS_InTrailingSep:
23506c3fb27SDimitry Andric     case PS_AtEnd:
23606c3fb27SDimitry Andric       return getAfterBack();
23706c3fb27SDimitry Andric     }
23806c3fb27SDimitry Andric     __libcpp_unreachable();
23906c3fb27SDimitry Andric   }
24006c3fb27SDimitry Andric 
24106c3fb27SDimitry Andric   /// \brief Return a pointer to the first character in the currently lexed
24206c3fb27SDimitry Andric   ///   element.
24306c3fb27SDimitry Andric   PosPtr getCurrentTokenStartPos() const noexcept {
244*0fca6ea1SDimitry Andric     switch (State_) {
24506c3fb27SDimitry Andric     case PS_BeforeBegin:
24606c3fb27SDimitry Andric     case PS_InRootName:
24706c3fb27SDimitry Andric       return &Path.front();
24806c3fb27SDimitry Andric     case PS_InRootDir:
24906c3fb27SDimitry Andric     case PS_InFilenames:
25006c3fb27SDimitry Andric     case PS_InTrailingSep:
25106c3fb27SDimitry Andric       return &RawEntry.front();
25206c3fb27SDimitry Andric     case PS_AtEnd:
25306c3fb27SDimitry Andric       return &Path.back() + 1;
25406c3fb27SDimitry Andric     }
25506c3fb27SDimitry Andric     __libcpp_unreachable();
25606c3fb27SDimitry Andric   }
25706c3fb27SDimitry Andric 
25806c3fb27SDimitry Andric   // Consume all consecutive separators.
25906c3fb27SDimitry Andric   PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept {
26006c3fb27SDimitry Andric     if (P == nullptr || P == End || !isSeparator(*P))
26106c3fb27SDimitry Andric       return nullptr;
26206c3fb27SDimitry Andric     const int Inc = P < End ? 1 : -1;
26306c3fb27SDimitry Andric     P += Inc;
26406c3fb27SDimitry Andric     while (P != End && isSeparator(*P))
26506c3fb27SDimitry Andric       P += Inc;
26606c3fb27SDimitry Andric     return P;
26706c3fb27SDimitry Andric   }
26806c3fb27SDimitry Andric 
26906c3fb27SDimitry Andric   // Consume exactly N separators, or return nullptr.
27006c3fb27SDimitry Andric   PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept {
27106c3fb27SDimitry Andric     PosPtr Ret = consumeAllSeparators(P, End);
27206c3fb27SDimitry Andric     if (Ret == nullptr)
27306c3fb27SDimitry Andric       return nullptr;
27406c3fb27SDimitry Andric     if (P < End) {
27506c3fb27SDimitry Andric       if (Ret == P + N)
27606c3fb27SDimitry Andric         return Ret;
27706c3fb27SDimitry Andric     } else {
27806c3fb27SDimitry Andric       if (Ret == P - N)
27906c3fb27SDimitry Andric         return Ret;
28006c3fb27SDimitry Andric     }
28106c3fb27SDimitry Andric     return nullptr;
28206c3fb27SDimitry Andric   }
28306c3fb27SDimitry Andric 
28406c3fb27SDimitry Andric   PosPtr consumeName(PosPtr P, PosPtr End) const noexcept {
28506c3fb27SDimitry Andric     PosPtr Start = P;
28606c3fb27SDimitry Andric     if (P == nullptr || P == End || isSeparator(*P))
28706c3fb27SDimitry Andric       return nullptr;
28806c3fb27SDimitry Andric     const int Inc = P < End ? 1 : -1;
28906c3fb27SDimitry Andric     P += Inc;
29006c3fb27SDimitry Andric     while (P != End && !isSeparator(*P))
29106c3fb27SDimitry Andric       P += Inc;
29206c3fb27SDimitry Andric     if (P == End && Inc < 0) {
29306c3fb27SDimitry Andric       // Iterating backwards and consumed all the rest of the input.
29406c3fb27SDimitry Andric       // Check if the start of the string would have been considered
29506c3fb27SDimitry Andric       // a root name.
29606c3fb27SDimitry Andric       PosPtr RootEnd = consumeRootName(End + 1, Start);
29706c3fb27SDimitry Andric       if (RootEnd)
29806c3fb27SDimitry Andric         return RootEnd - 1;
29906c3fb27SDimitry Andric     }
30006c3fb27SDimitry Andric     return P;
30106c3fb27SDimitry Andric   }
30206c3fb27SDimitry Andric 
30306c3fb27SDimitry Andric   PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept {
30406c3fb27SDimitry Andric     if (P == End)
30506c3fb27SDimitry Andric       return nullptr;
30606c3fb27SDimitry Andric     if (P < End) {
30706c3fb27SDimitry Andric       if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':')
30806c3fb27SDimitry Andric         return nullptr;
30906c3fb27SDimitry Andric       return P + 2;
31006c3fb27SDimitry Andric     } else {
31106c3fb27SDimitry Andric       if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':')
31206c3fb27SDimitry Andric         return nullptr;
31306c3fb27SDimitry Andric       return P - 2;
31406c3fb27SDimitry Andric     }
31506c3fb27SDimitry Andric   }
31606c3fb27SDimitry Andric 
31706c3fb27SDimitry Andric   PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept {
31806c3fb27SDimitry Andric     if (P == End)
31906c3fb27SDimitry Andric       return nullptr;
32006c3fb27SDimitry Andric     if (P < End)
32106c3fb27SDimitry Andric       return consumeName(consumeNSeparators(P, End, 2), End);
32206c3fb27SDimitry Andric     else
32306c3fb27SDimitry Andric       return consumeNSeparators(consumeName(P, End), End, 2);
32406c3fb27SDimitry Andric   }
32506c3fb27SDimitry Andric 
32606c3fb27SDimitry Andric   PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept {
32706c3fb27SDimitry Andric #if defined(_LIBCPP_WIN32API)
32806c3fb27SDimitry Andric     if (PosPtr Ret = consumeDriveLetter(P, End))
32906c3fb27SDimitry Andric       return Ret;
33006c3fb27SDimitry Andric     if (PosPtr Ret = consumeNetworkRoot(P, End))
33106c3fb27SDimitry Andric       return Ret;
33206c3fb27SDimitry Andric #endif
33306c3fb27SDimitry Andric     return nullptr;
33406c3fb27SDimitry Andric   }
33506c3fb27SDimitry Andric };
33606c3fb27SDimitry Andric 
33706c3fb27SDimitry Andric inline string_view_pair separate_filename(string_view_t const& s) {
33806c3fb27SDimitry Andric   if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty())
33906c3fb27SDimitry Andric     return string_view_pair{s, PATHSTR("")};
34006c3fb27SDimitry Andric   auto pos = s.find_last_of('.');
34106c3fb27SDimitry Andric   if (pos == string_view_t::npos || pos == 0)
34206c3fb27SDimitry Andric     return string_view_pair{s, string_view_t{}};
34306c3fb27SDimitry Andric   return string_view_pair{s.substr(0, pos), s.substr(pos)};
34406c3fb27SDimitry Andric }
34506c3fb27SDimitry Andric 
346cb14a3feSDimitry Andric inline string_view_t createView(PosPtr S, PosPtr E) noexcept { return {S, static_cast<size_t>(E - S) + 1}; }
34706c3fb27SDimitry Andric 
34806c3fb27SDimitry Andric } // namespace parser
34906c3fb27SDimitry Andric 
35006c3fb27SDimitry Andric _LIBCPP_END_NAMESPACE_FILESYSTEM
35106c3fb27SDimitry Andric 
35206c3fb27SDimitry Andric #endif // PATH_PARSER_H
353