106c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric 906c3fb27SDimitry Andric #ifndef PATH_PARSER_H 1006c3fb27SDimitry Andric #define PATH_PARSER_H 1106c3fb27SDimitry Andric 1206c3fb27SDimitry Andric #include <__config> 1306c3fb27SDimitry Andric #include <__utility/unreachable.h> 1406c3fb27SDimitry Andric #include <cstddef> 1506c3fb27SDimitry Andric #include <filesystem> 1606c3fb27SDimitry Andric #include <utility> 1706c3fb27SDimitry Andric 1806c3fb27SDimitry Andric #include "format_string.h" 1906c3fb27SDimitry Andric 2006c3fb27SDimitry Andric _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM 2106c3fb27SDimitry Andric 2206c3fb27SDimitry Andric inline bool isSeparator(path::value_type C) { 2306c3fb27SDimitry Andric if (C == '/') 2406c3fb27SDimitry Andric return true; 2506c3fb27SDimitry Andric #if defined(_LIBCPP_WIN32API) 2606c3fb27SDimitry Andric if (C == '\\') 2706c3fb27SDimitry Andric return true; 2806c3fb27SDimitry Andric #endif 2906c3fb27SDimitry Andric return false; 3006c3fb27SDimitry Andric } 3106c3fb27SDimitry Andric 32cb14a3feSDimitry Andric inline bool isDriveLetter(path::value_type C) { return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'); } 3306c3fb27SDimitry Andric 3406c3fb27SDimitry Andric namespace parser { 3506c3fb27SDimitry Andric 3606c3fb27SDimitry Andric using string_view_t = path::__string_view; 3706c3fb27SDimitry Andric using string_view_pair = pair<string_view_t, string_view_t>; 3806c3fb27SDimitry Andric using PosPtr = path::value_type const*; 3906c3fb27SDimitry Andric 4006c3fb27SDimitry Andric struct PathParser { 4106c3fb27SDimitry Andric enum ParserState : unsigned char { 4206c3fb27SDimitry Andric // Zero is a special sentinel value used by default constructed iterators. 4306c3fb27SDimitry Andric PS_BeforeBegin = path::iterator::_BeforeBegin, 4406c3fb27SDimitry Andric PS_InRootName = path::iterator::_InRootName, 4506c3fb27SDimitry Andric PS_InRootDir = path::iterator::_InRootDir, 4606c3fb27SDimitry Andric PS_InFilenames = path::iterator::_InFilenames, 4706c3fb27SDimitry Andric PS_InTrailingSep = path::iterator::_InTrailingSep, 4806c3fb27SDimitry Andric PS_AtEnd = path::iterator::_AtEnd 4906c3fb27SDimitry Andric }; 5006c3fb27SDimitry Andric 5106c3fb27SDimitry Andric const string_view_t Path; 5206c3fb27SDimitry Andric string_view_t RawEntry; 53*0fca6ea1SDimitry Andric ParserState State_; 5406c3fb27SDimitry Andric 5506c3fb27SDimitry Andric private: 56*0fca6ea1SDimitry Andric PathParser(string_view_t P, ParserState State) noexcept : Path(P), State_(State) {} 5706c3fb27SDimitry Andric 5806c3fb27SDimitry Andric public: 5906c3fb27SDimitry Andric PathParser(string_view_t P, string_view_t E, unsigned char S) 60*0fca6ea1SDimitry Andric : Path(P), RawEntry(E), State_(static_cast<ParserState>(S)) { 6106c3fb27SDimitry Andric // S cannot be '0' or PS_BeforeBegin. 6206c3fb27SDimitry Andric } 6306c3fb27SDimitry Andric 6406c3fb27SDimitry Andric static PathParser CreateBegin(string_view_t P) noexcept { 6506c3fb27SDimitry Andric PathParser PP(P, PS_BeforeBegin); 6606c3fb27SDimitry Andric PP.increment(); 6706c3fb27SDimitry Andric return PP; 6806c3fb27SDimitry Andric } 6906c3fb27SDimitry Andric 7006c3fb27SDimitry Andric static PathParser CreateEnd(string_view_t P) noexcept { 7106c3fb27SDimitry Andric PathParser PP(P, PS_AtEnd); 7206c3fb27SDimitry Andric return PP; 7306c3fb27SDimitry Andric } 7406c3fb27SDimitry Andric 7506c3fb27SDimitry Andric PosPtr peek() const noexcept { 7606c3fb27SDimitry Andric auto TkEnd = getNextTokenStartPos(); 7706c3fb27SDimitry Andric auto End = getAfterBack(); 7806c3fb27SDimitry Andric return TkEnd == End ? nullptr : TkEnd; 7906c3fb27SDimitry Andric } 8006c3fb27SDimitry Andric 8106c3fb27SDimitry Andric void increment() noexcept { 8206c3fb27SDimitry Andric const PosPtr End = getAfterBack(); 8306c3fb27SDimitry Andric const PosPtr Start = getNextTokenStartPos(); 8406c3fb27SDimitry Andric if (Start == End) 8506c3fb27SDimitry Andric return makeState(PS_AtEnd); 8606c3fb27SDimitry Andric 87*0fca6ea1SDimitry Andric switch (State_) { 8806c3fb27SDimitry Andric case PS_BeforeBegin: { 8906c3fb27SDimitry Andric PosPtr TkEnd = consumeRootName(Start, End); 9006c3fb27SDimitry Andric if (TkEnd) 9106c3fb27SDimitry Andric return makeState(PS_InRootName, Start, TkEnd); 9206c3fb27SDimitry Andric } 9306c3fb27SDimitry Andric _LIBCPP_FALLTHROUGH(); 9406c3fb27SDimitry Andric case PS_InRootName: { 9506c3fb27SDimitry Andric PosPtr TkEnd = consumeAllSeparators(Start, End); 9606c3fb27SDimitry Andric if (TkEnd) 9706c3fb27SDimitry Andric return makeState(PS_InRootDir, Start, TkEnd); 9806c3fb27SDimitry Andric else 9906c3fb27SDimitry Andric return makeState(PS_InFilenames, Start, consumeName(Start, End)); 10006c3fb27SDimitry Andric } 10106c3fb27SDimitry Andric case PS_InRootDir: 10206c3fb27SDimitry Andric return makeState(PS_InFilenames, Start, consumeName(Start, End)); 10306c3fb27SDimitry Andric 10406c3fb27SDimitry Andric case PS_InFilenames: { 10506c3fb27SDimitry Andric PosPtr SepEnd = consumeAllSeparators(Start, End); 10606c3fb27SDimitry Andric if (SepEnd != End) { 10706c3fb27SDimitry Andric PosPtr TkEnd = consumeName(SepEnd, End); 10806c3fb27SDimitry Andric if (TkEnd) 10906c3fb27SDimitry Andric return makeState(PS_InFilenames, SepEnd, TkEnd); 11006c3fb27SDimitry Andric } 11106c3fb27SDimitry Andric return makeState(PS_InTrailingSep, Start, SepEnd); 11206c3fb27SDimitry Andric } 11306c3fb27SDimitry Andric 11406c3fb27SDimitry Andric case PS_InTrailingSep: 11506c3fb27SDimitry Andric return makeState(PS_AtEnd); 11606c3fb27SDimitry Andric 11706c3fb27SDimitry Andric case PS_AtEnd: 11806c3fb27SDimitry Andric __libcpp_unreachable(); 11906c3fb27SDimitry Andric } 12006c3fb27SDimitry Andric } 12106c3fb27SDimitry Andric 12206c3fb27SDimitry Andric void decrement() noexcept { 12306c3fb27SDimitry Andric const PosPtr REnd = getBeforeFront(); 12406c3fb27SDimitry Andric const PosPtr RStart = getCurrentTokenStartPos() - 1; 12506c3fb27SDimitry Andric if (RStart == REnd) // we're decrementing the begin 12606c3fb27SDimitry Andric return makeState(PS_BeforeBegin); 12706c3fb27SDimitry Andric 128*0fca6ea1SDimitry Andric switch (State_) { 12906c3fb27SDimitry Andric case PS_AtEnd: { 13006c3fb27SDimitry Andric // Try to consume a trailing separator or root directory first. 13106c3fb27SDimitry Andric if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) { 13206c3fb27SDimitry Andric if (SepEnd == REnd) 13306c3fb27SDimitry Andric return makeState(PS_InRootDir, Path.data(), RStart + 1); 13406c3fb27SDimitry Andric PosPtr TkStart = consumeRootName(SepEnd, REnd); 13506c3fb27SDimitry Andric if (TkStart == REnd) 13606c3fb27SDimitry Andric return makeState(PS_InRootDir, RStart, RStart + 1); 13706c3fb27SDimitry Andric return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1); 13806c3fb27SDimitry Andric } else { 13906c3fb27SDimitry Andric PosPtr TkStart = consumeRootName(RStart, REnd); 14006c3fb27SDimitry Andric if (TkStart == REnd) 14106c3fb27SDimitry Andric return makeState(PS_InRootName, TkStart + 1, RStart + 1); 14206c3fb27SDimitry Andric TkStart = consumeName(RStart, REnd); 14306c3fb27SDimitry Andric return makeState(PS_InFilenames, TkStart + 1, RStart + 1); 14406c3fb27SDimitry Andric } 14506c3fb27SDimitry Andric } 14606c3fb27SDimitry Andric case PS_InTrailingSep: 147cb14a3feSDimitry Andric return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1, RStart + 1); 14806c3fb27SDimitry Andric case PS_InFilenames: { 14906c3fb27SDimitry Andric PosPtr SepEnd = consumeAllSeparators(RStart, REnd); 15006c3fb27SDimitry Andric if (SepEnd == REnd) 15106c3fb27SDimitry Andric return makeState(PS_InRootDir, Path.data(), RStart + 1); 15206c3fb27SDimitry Andric PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd); 15306c3fb27SDimitry Andric if (TkStart == REnd) { 15406c3fb27SDimitry Andric if (SepEnd) 15506c3fb27SDimitry Andric return makeState(PS_InRootDir, SepEnd + 1, RStart + 1); 15606c3fb27SDimitry Andric return makeState(PS_InRootName, TkStart + 1, RStart + 1); 15706c3fb27SDimitry Andric } 15806c3fb27SDimitry Andric TkStart = consumeName(SepEnd, REnd); 15906c3fb27SDimitry Andric return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1); 16006c3fb27SDimitry Andric } 16106c3fb27SDimitry Andric case PS_InRootDir: 16206c3fb27SDimitry Andric return makeState(PS_InRootName, Path.data(), RStart + 1); 16306c3fb27SDimitry Andric case PS_InRootName: 16406c3fb27SDimitry Andric case PS_BeforeBegin: 16506c3fb27SDimitry Andric __libcpp_unreachable(); 16606c3fb27SDimitry Andric } 16706c3fb27SDimitry Andric } 16806c3fb27SDimitry Andric 16906c3fb27SDimitry Andric /// \brief Return a view with the "preferred representation" of the current 17006c3fb27SDimitry Andric /// element. For example trailing separators are represented as a '.' 17106c3fb27SDimitry Andric string_view_t operator*() const noexcept { 172*0fca6ea1SDimitry Andric switch (State_) { 17306c3fb27SDimitry Andric case PS_BeforeBegin: 17406c3fb27SDimitry Andric case PS_AtEnd: 17506c3fb27SDimitry Andric return PATHSTR(""); 17606c3fb27SDimitry Andric case PS_InRootDir: 17706c3fb27SDimitry Andric if (RawEntry[0] == '\\') 17806c3fb27SDimitry Andric return PATHSTR("\\"); 17906c3fb27SDimitry Andric else 18006c3fb27SDimitry Andric return PATHSTR("/"); 18106c3fb27SDimitry Andric case PS_InTrailingSep: 18206c3fb27SDimitry Andric return PATHSTR(""); 18306c3fb27SDimitry Andric case PS_InRootName: 18406c3fb27SDimitry Andric case PS_InFilenames: 18506c3fb27SDimitry Andric return RawEntry; 18606c3fb27SDimitry Andric } 18706c3fb27SDimitry Andric __libcpp_unreachable(); 18806c3fb27SDimitry Andric } 18906c3fb27SDimitry Andric 190*0fca6ea1SDimitry Andric explicit operator bool() const noexcept { return State_ != PS_BeforeBegin && State_ != PS_AtEnd; } 19106c3fb27SDimitry Andric 19206c3fb27SDimitry Andric PathParser& operator++() noexcept { 19306c3fb27SDimitry Andric increment(); 19406c3fb27SDimitry Andric return *this; 19506c3fb27SDimitry Andric } 19606c3fb27SDimitry Andric 19706c3fb27SDimitry Andric PathParser& operator--() noexcept { 19806c3fb27SDimitry Andric decrement(); 19906c3fb27SDimitry Andric return *this; 20006c3fb27SDimitry Andric } 20106c3fb27SDimitry Andric 202*0fca6ea1SDimitry Andric bool atEnd() const noexcept { return State_ == PS_AtEnd; } 20306c3fb27SDimitry Andric 204*0fca6ea1SDimitry Andric bool inRootDir() const noexcept { return State_ == PS_InRootDir; } 20506c3fb27SDimitry Andric 206*0fca6ea1SDimitry Andric bool inRootName() const noexcept { return State_ == PS_InRootName; } 20706c3fb27SDimitry Andric 208cb14a3feSDimitry Andric bool inRootPath() const noexcept { return inRootName() || inRootDir(); } 20906c3fb27SDimitry Andric 21006c3fb27SDimitry Andric private: 21106c3fb27SDimitry Andric void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept { 212*0fca6ea1SDimitry Andric State_ = NewState; 21306c3fb27SDimitry Andric RawEntry = string_view_t(Start, End - Start); 21406c3fb27SDimitry Andric } 21506c3fb27SDimitry Andric void makeState(ParserState NewState) noexcept { 216*0fca6ea1SDimitry Andric State_ = NewState; 21706c3fb27SDimitry Andric RawEntry = {}; 21806c3fb27SDimitry Andric } 21906c3fb27SDimitry Andric 22006c3fb27SDimitry Andric PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); } 22106c3fb27SDimitry Andric 22206c3fb27SDimitry Andric PosPtr getBeforeFront() const noexcept { return Path.data() - 1; } 22306c3fb27SDimitry Andric 22406c3fb27SDimitry Andric /// \brief Return a pointer to the first character after the currently 22506c3fb27SDimitry Andric /// lexed element. 22606c3fb27SDimitry Andric PosPtr getNextTokenStartPos() const noexcept { 227*0fca6ea1SDimitry Andric switch (State_) { 22806c3fb27SDimitry Andric case PS_BeforeBegin: 22906c3fb27SDimitry Andric return Path.data(); 23006c3fb27SDimitry Andric case PS_InRootName: 23106c3fb27SDimitry Andric case PS_InRootDir: 23206c3fb27SDimitry Andric case PS_InFilenames: 23306c3fb27SDimitry Andric return &RawEntry.back() + 1; 23406c3fb27SDimitry Andric case PS_InTrailingSep: 23506c3fb27SDimitry Andric case PS_AtEnd: 23606c3fb27SDimitry Andric return getAfterBack(); 23706c3fb27SDimitry Andric } 23806c3fb27SDimitry Andric __libcpp_unreachable(); 23906c3fb27SDimitry Andric } 24006c3fb27SDimitry Andric 24106c3fb27SDimitry Andric /// \brief Return a pointer to the first character in the currently lexed 24206c3fb27SDimitry Andric /// element. 24306c3fb27SDimitry Andric PosPtr getCurrentTokenStartPos() const noexcept { 244*0fca6ea1SDimitry Andric switch (State_) { 24506c3fb27SDimitry Andric case PS_BeforeBegin: 24606c3fb27SDimitry Andric case PS_InRootName: 24706c3fb27SDimitry Andric return &Path.front(); 24806c3fb27SDimitry Andric case PS_InRootDir: 24906c3fb27SDimitry Andric case PS_InFilenames: 25006c3fb27SDimitry Andric case PS_InTrailingSep: 25106c3fb27SDimitry Andric return &RawEntry.front(); 25206c3fb27SDimitry Andric case PS_AtEnd: 25306c3fb27SDimitry Andric return &Path.back() + 1; 25406c3fb27SDimitry Andric } 25506c3fb27SDimitry Andric __libcpp_unreachable(); 25606c3fb27SDimitry Andric } 25706c3fb27SDimitry Andric 25806c3fb27SDimitry Andric // Consume all consecutive separators. 25906c3fb27SDimitry Andric PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept { 26006c3fb27SDimitry Andric if (P == nullptr || P == End || !isSeparator(*P)) 26106c3fb27SDimitry Andric return nullptr; 26206c3fb27SDimitry Andric const int Inc = P < End ? 1 : -1; 26306c3fb27SDimitry Andric P += Inc; 26406c3fb27SDimitry Andric while (P != End && isSeparator(*P)) 26506c3fb27SDimitry Andric P += Inc; 26606c3fb27SDimitry Andric return P; 26706c3fb27SDimitry Andric } 26806c3fb27SDimitry Andric 26906c3fb27SDimitry Andric // Consume exactly N separators, or return nullptr. 27006c3fb27SDimitry Andric PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept { 27106c3fb27SDimitry Andric PosPtr Ret = consumeAllSeparators(P, End); 27206c3fb27SDimitry Andric if (Ret == nullptr) 27306c3fb27SDimitry Andric return nullptr; 27406c3fb27SDimitry Andric if (P < End) { 27506c3fb27SDimitry Andric if (Ret == P + N) 27606c3fb27SDimitry Andric return Ret; 27706c3fb27SDimitry Andric } else { 27806c3fb27SDimitry Andric if (Ret == P - N) 27906c3fb27SDimitry Andric return Ret; 28006c3fb27SDimitry Andric } 28106c3fb27SDimitry Andric return nullptr; 28206c3fb27SDimitry Andric } 28306c3fb27SDimitry Andric 28406c3fb27SDimitry Andric PosPtr consumeName(PosPtr P, PosPtr End) const noexcept { 28506c3fb27SDimitry Andric PosPtr Start = P; 28606c3fb27SDimitry Andric if (P == nullptr || P == End || isSeparator(*P)) 28706c3fb27SDimitry Andric return nullptr; 28806c3fb27SDimitry Andric const int Inc = P < End ? 1 : -1; 28906c3fb27SDimitry Andric P += Inc; 29006c3fb27SDimitry Andric while (P != End && !isSeparator(*P)) 29106c3fb27SDimitry Andric P += Inc; 29206c3fb27SDimitry Andric if (P == End && Inc < 0) { 29306c3fb27SDimitry Andric // Iterating backwards and consumed all the rest of the input. 29406c3fb27SDimitry Andric // Check if the start of the string would have been considered 29506c3fb27SDimitry Andric // a root name. 29606c3fb27SDimitry Andric PosPtr RootEnd = consumeRootName(End + 1, Start); 29706c3fb27SDimitry Andric if (RootEnd) 29806c3fb27SDimitry Andric return RootEnd - 1; 29906c3fb27SDimitry Andric } 30006c3fb27SDimitry Andric return P; 30106c3fb27SDimitry Andric } 30206c3fb27SDimitry Andric 30306c3fb27SDimitry Andric PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept { 30406c3fb27SDimitry Andric if (P == End) 30506c3fb27SDimitry Andric return nullptr; 30606c3fb27SDimitry Andric if (P < End) { 30706c3fb27SDimitry Andric if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':') 30806c3fb27SDimitry Andric return nullptr; 30906c3fb27SDimitry Andric return P + 2; 31006c3fb27SDimitry Andric } else { 31106c3fb27SDimitry Andric if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':') 31206c3fb27SDimitry Andric return nullptr; 31306c3fb27SDimitry Andric return P - 2; 31406c3fb27SDimitry Andric } 31506c3fb27SDimitry Andric } 31606c3fb27SDimitry Andric 31706c3fb27SDimitry Andric PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept { 31806c3fb27SDimitry Andric if (P == End) 31906c3fb27SDimitry Andric return nullptr; 32006c3fb27SDimitry Andric if (P < End) 32106c3fb27SDimitry Andric return consumeName(consumeNSeparators(P, End, 2), End); 32206c3fb27SDimitry Andric else 32306c3fb27SDimitry Andric return consumeNSeparators(consumeName(P, End), End, 2); 32406c3fb27SDimitry Andric } 32506c3fb27SDimitry Andric 32606c3fb27SDimitry Andric PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept { 32706c3fb27SDimitry Andric #if defined(_LIBCPP_WIN32API) 32806c3fb27SDimitry Andric if (PosPtr Ret = consumeDriveLetter(P, End)) 32906c3fb27SDimitry Andric return Ret; 33006c3fb27SDimitry Andric if (PosPtr Ret = consumeNetworkRoot(P, End)) 33106c3fb27SDimitry Andric return Ret; 33206c3fb27SDimitry Andric #endif 33306c3fb27SDimitry Andric return nullptr; 33406c3fb27SDimitry Andric } 33506c3fb27SDimitry Andric }; 33606c3fb27SDimitry Andric 33706c3fb27SDimitry Andric inline string_view_pair separate_filename(string_view_t const& s) { 33806c3fb27SDimitry Andric if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty()) 33906c3fb27SDimitry Andric return string_view_pair{s, PATHSTR("")}; 34006c3fb27SDimitry Andric auto pos = s.find_last_of('.'); 34106c3fb27SDimitry Andric if (pos == string_view_t::npos || pos == 0) 34206c3fb27SDimitry Andric return string_view_pair{s, string_view_t{}}; 34306c3fb27SDimitry Andric return string_view_pair{s.substr(0, pos), s.substr(pos)}; 34406c3fb27SDimitry Andric } 34506c3fb27SDimitry Andric 346cb14a3feSDimitry Andric inline string_view_t createView(PosPtr S, PosPtr E) noexcept { return {S, static_cast<size_t>(E - S) + 1}; } 34706c3fb27SDimitry Andric 34806c3fb27SDimitry Andric } // namespace parser 34906c3fb27SDimitry Andric 35006c3fb27SDimitry Andric _LIBCPP_END_NAMESPACE_FILESYSTEM 35106c3fb27SDimitry Andric 35206c3fb27SDimitry Andric #endif // PATH_PARSER_H 353