xref: /llvm-project/libcxx/src/filesystem/path.cpp (revision b69ddbc62838f23ace237c206676b1ed1c882638)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <__config>
10 #include <filesystem>
11 #include <vector>
12 
13 #include "error.h"
14 #include "path_parser.h"
15 
16 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
17 
18 using detail::ErrorHandler;
19 using parser::createView;
20 using parser::PathParser;
21 using parser::string_view_t;
22 
23 ///////////////////////////////////////////////////////////////////////////////
24 //                            path definitions
25 ///////////////////////////////////////////////////////////////////////////////
26 
27 _LIBCPP_DIAGNOSTIC_PUSH
28 _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wdeprecated")
29 constexpr path::value_type path::preferred_separator;
30 _LIBCPP_DIAGNOSTIC_POP
31 
32 path& path::replace_extension(path const& replacement) {
33   path p = extension();
34   if (not p.empty()) {
35     __pn_.erase(__pn_.size() - p.native().size());
36   }
37   if (!replacement.empty()) {
38     if (replacement.native()[0] != '.') {
39       __pn_ += PATHSTR(".");
40     }
41     __pn_.append(replacement.__pn_);
42   }
43   return *this;
44 }
45 
46 ///////////////////////////////////////////////////////////////////////////////
47 // path.decompose
48 
49 string_view_t path::__root_name() const {
50   auto PP = PathParser::CreateBegin(__pn_);
51   if (PP.State_ == PathParser::PS_InRootName)
52     return *PP;
53   return {};
54 }
55 
56 string_view_t path::__root_directory() const {
57   auto PP = PathParser::CreateBegin(__pn_);
58   if (PP.State_ == PathParser::PS_InRootName)
59     ++PP;
60   if (PP.State_ == PathParser::PS_InRootDir)
61     return *PP;
62   return {};
63 }
64 
65 string_view_t path::__root_path_raw() const {
66   auto PP = PathParser::CreateBegin(__pn_);
67   if (PP.State_ == PathParser::PS_InRootName) {
68     auto NextCh = PP.peek();
69     if (NextCh && isSeparator(*NextCh)) {
70       ++PP;
71       return createView(__pn_.data(), &PP.RawEntry.back());
72     }
73     return PP.RawEntry;
74   }
75   if (PP.State_ == PathParser::PS_InRootDir)
76     return *PP;
77   return {};
78 }
79 
80 static bool ConsumeRootName(PathParser* PP) {
81   static_assert(PathParser::PS_BeforeBegin == 1 && PathParser::PS_InRootName == 2, "Values for enums are incorrect");
82   while (PP->State_ <= PathParser::PS_InRootName)
83     ++(*PP);
84   return PP->State_ == PathParser::PS_AtEnd;
85 }
86 
87 static bool ConsumeRootDir(PathParser* PP) {
88   static_assert(PathParser::PS_BeforeBegin == 1 && PathParser::PS_InRootName == 2 && PathParser::PS_InRootDir == 3,
89                 "Values for enums are incorrect");
90   while (PP->State_ <= PathParser::PS_InRootDir)
91     ++(*PP);
92   return PP->State_ == PathParser::PS_AtEnd;
93 }
94 
95 string_view_t path::__relative_path() const {
96   auto PP = PathParser::CreateBegin(__pn_);
97   if (ConsumeRootDir(&PP))
98     return {};
99   return createView(PP.RawEntry.data(), &__pn_.back());
100 }
101 
102 string_view_t path::__parent_path() const {
103   if (empty())
104     return {};
105   // Determine if we have a root path but not a relative path. In that case
106   // return *this.
107   {
108     auto PP = PathParser::CreateBegin(__pn_);
109     if (ConsumeRootDir(&PP))
110       return __pn_;
111   }
112   // Otherwise remove a single element from the end of the path, and return
113   // a string representing that path
114   {
115     auto PP = PathParser::CreateEnd(__pn_);
116     --PP;
117     if (PP.RawEntry.data() == __pn_.data())
118       return {};
119     --PP;
120     return createView(__pn_.data(), &PP.RawEntry.back());
121   }
122 }
123 
124 string_view_t path::__filename() const {
125   if (empty())
126     return {};
127   {
128     PathParser PP = PathParser::CreateBegin(__pn_);
129     if (ConsumeRootDir(&PP))
130       return {};
131   }
132   return *(--PathParser::CreateEnd(__pn_));
133 }
134 
135 string_view_t path::__stem() const { return parser::separate_filename(__filename()).first; }
136 
137 string_view_t path::__extension() const { return parser::separate_filename(__filename()).second; }
138 
139 ////////////////////////////////////////////////////////////////////////////
140 // path.gen
141 
142 enum PathPartKind : unsigned char { PK_None, PK_RootSep, PK_Filename, PK_Dot, PK_DotDot, PK_TrailingSep };
143 
144 static PathPartKind ClassifyPathPart(string_view_t Part) {
145   if (Part.empty())
146     return PK_TrailingSep;
147   if (Part == PATHSTR("."))
148     return PK_Dot;
149   if (Part == PATHSTR(".."))
150     return PK_DotDot;
151   if (Part == PATHSTR("/"))
152     return PK_RootSep;
153 #if defined(_LIBCPP_WIN32API)
154   if (Part == PATHSTR("\\"))
155     return PK_RootSep;
156 #endif
157   return PK_Filename;
158 }
159 
160 path path::lexically_normal() const {
161   if (__pn_.empty())
162     return *this;
163 
164   using PartKindPair = pair<string_view_t, PathPartKind>;
165   vector<PartKindPair> Parts;
166   // Guess as to how many elements the path has to avoid reallocating.
167   Parts.reserve(32);
168 
169   // Track the total size of the parts as we collect them. This allows the
170   // resulting path to reserve the correct amount of memory.
171   size_t NewPathSize = 0;
172   auto AddPart       = [&](PathPartKind K, string_view_t P) {
173     NewPathSize += P.size();
174     Parts.emplace_back(P, K);
175   };
176   auto LastPartKind = [&]() {
177     if (Parts.empty())
178       return PK_None;
179     return Parts.back().second;
180   };
181 
182   bool MaybeNeedTrailingSep = false;
183   // Build a stack containing the remaining elements of the path, popping off
184   // elements which occur before a '..' entry.
185   for (auto PP = PathParser::CreateBegin(__pn_); PP; ++PP) {
186     auto Part         = *PP;
187     PathPartKind Kind = ClassifyPathPart(Part);
188     switch (Kind) {
189     case PK_Filename:
190     case PK_RootSep: {
191       // Add all non-dot and non-dot-dot elements to the stack of elements.
192       AddPart(Kind, Part);
193       MaybeNeedTrailingSep = false;
194       break;
195     }
196     case PK_DotDot: {
197       // Only push a ".." element if there are no elements preceding the "..",
198       // or if the preceding element is itself "..".
199       auto LastKind = LastPartKind();
200       if (LastKind == PK_Filename) {
201         NewPathSize -= Parts.back().first.size();
202         Parts.pop_back();
203       } else if (LastKind != PK_RootSep)
204         AddPart(PK_DotDot, PATHSTR(".."));
205       MaybeNeedTrailingSep = LastKind == PK_Filename;
206       break;
207     }
208     case PK_Dot:
209     case PK_TrailingSep: {
210       MaybeNeedTrailingSep = true;
211       break;
212     }
213     case PK_None:
214       __libcpp_unreachable();
215     }
216   }
217   // [fs.path.generic]p6.8: If the path is empty, add a dot.
218   if (Parts.empty())
219     return PATHSTR(".");
220 
221   // [fs.path.generic]p6.7: If the last filename is dot-dot, remove any
222   // trailing directory-separator.
223   bool NeedTrailingSep = MaybeNeedTrailingSep && LastPartKind() == PK_Filename;
224 
225   path Result;
226   Result.__pn_.reserve(Parts.size() + NewPathSize + NeedTrailingSep);
227   for (auto& PK : Parts)
228     Result /= PK.first;
229 
230   if (NeedTrailingSep)
231     Result /= PATHSTR("");
232 
233   Result.make_preferred();
234   return Result;
235 }
236 
237 static int DetermineLexicalElementCount(PathParser PP) {
238   int Count = 0;
239   for (; PP; ++PP) {
240     auto Elem = *PP;
241     if (Elem == PATHSTR(".."))
242       --Count;
243     else if (Elem != PATHSTR(".") && Elem != PATHSTR(""))
244       ++Count;
245   }
246   return Count;
247 }
248 
249 path path::lexically_relative(const path& base) const {
250   { // perform root-name/root-directory mismatch checks
251     auto PP                      = PathParser::CreateBegin(__pn_);
252     auto PPBase                  = PathParser::CreateBegin(base.__pn_);
253     auto CheckIterMismatchAtBase = [&]() {
254       return PP.State_ != PPBase.State_ && (PP.inRootPath() || PPBase.inRootPath());
255     };
256     if (PP.inRootName() && PPBase.inRootName()) {
257       if (*PP != *PPBase)
258         return {};
259     } else if (CheckIterMismatchAtBase())
260       return {};
261 
262     if (PP.inRootPath())
263       ++PP;
264     if (PPBase.inRootPath())
265       ++PPBase;
266     if (CheckIterMismatchAtBase())
267       return {};
268   }
269 
270   // Find the first mismatching element
271   auto PP     = PathParser::CreateBegin(__pn_);
272   auto PPBase = PathParser::CreateBegin(base.__pn_);
273   while (PP && PPBase && PP.State_ == PPBase.State_ && (*PP == *PPBase || PP.inRootDir())) {
274     ++PP;
275     ++PPBase;
276   }
277 
278   // If there is no mismatch, return ".".
279   if (!PP && !PPBase)
280     return ".";
281 
282   // Otherwise, determine the number of elements, 'n', which are not dot or
283   // dot-dot minus the number of dot-dot elements.
284   int ElemCount = DetermineLexicalElementCount(PPBase);
285   if (ElemCount < 0)
286     return {};
287 
288   // if n == 0 and (a == end() || a->empty()), returns path("."); otherwise
289   if (ElemCount == 0 && (PP.atEnd() || *PP == PATHSTR("")))
290     return PATHSTR(".");
291 
292   // return a path constructed with 'n' dot-dot elements, followed by the
293   // elements of '*this' after the mismatch.
294   path Result;
295   // FIXME: Reserve enough room in Result that it won't have to re-allocate.
296   while (ElemCount--)
297     Result /= PATHSTR("..");
298   for (; PP; ++PP)
299     Result /= *PP;
300   return Result;
301 }
302 
303 ////////////////////////////////////////////////////////////////////////////
304 // path.comparisons
305 static int CompareRootName(PathParser* LHS, PathParser* RHS) {
306   if (!LHS->inRootName() && !RHS->inRootName())
307     return 0;
308 
309   auto GetRootName = [](PathParser* Parser) -> string_view_t { return Parser->inRootName() ? **Parser : PATHSTR(""); };
310   int res          = GetRootName(LHS).compare(GetRootName(RHS));
311   ConsumeRootName(LHS);
312   ConsumeRootName(RHS);
313   return res;
314 }
315 
316 static int CompareRootDir(PathParser* LHS, PathParser* RHS) {
317   if (!LHS->inRootDir() && RHS->inRootDir())
318     return -1;
319   else if (LHS->inRootDir() && !RHS->inRootDir())
320     return 1;
321   else {
322     ConsumeRootDir(LHS);
323     ConsumeRootDir(RHS);
324     return 0;
325   }
326 }
327 
328 static int CompareRelative(PathParser* LHSPtr, PathParser* RHSPtr) {
329   auto& LHS = *LHSPtr;
330   auto& RHS = *RHSPtr;
331 
332   int res;
333   while (LHS && RHS) {
334     if ((res = (*LHS).compare(*RHS)) != 0)
335       return res;
336     ++LHS;
337     ++RHS;
338   }
339   return 0;
340 }
341 
342 static int CompareEndState(PathParser* LHS, PathParser* RHS) {
343   if (LHS->atEnd() && !RHS->atEnd())
344     return -1;
345   else if (!LHS->atEnd() && RHS->atEnd())
346     return 1;
347   return 0;
348 }
349 
350 int path::__compare(string_view_t __s) const {
351   auto LHS = PathParser::CreateBegin(__pn_);
352   auto RHS = PathParser::CreateBegin(__s);
353   int res;
354 
355   if ((res = CompareRootName(&LHS, &RHS)) != 0)
356     return res;
357 
358   if ((res = CompareRootDir(&LHS, &RHS)) != 0)
359     return res;
360 
361   if ((res = CompareRelative(&LHS, &RHS)) != 0)
362     return res;
363 
364   return CompareEndState(&LHS, &RHS);
365 }
366 
367 ////////////////////////////////////////////////////////////////////////////
368 // path.nonmembers
369 size_t hash_value(const path& __p) noexcept {
370   auto PP           = PathParser::CreateBegin(__p.native());
371   size_t hash_value = 0;
372   hash<string_view_t> hasher;
373   while (PP) {
374     string_view_t Part = PP.inRootDir() ? PATHSTR("/") : *PP;
375     hash_value         = __hash_combine(hash_value, hasher(Part));
376     ++PP;
377   }
378   return hash_value;
379 }
380 
381 ////////////////////////////////////////////////////////////////////////////
382 // path.itr
383 path::iterator path::begin() const {
384   auto PP = PathParser::CreateBegin(__pn_);
385   iterator it;
386   it.__path_ptr_ = this;
387   it.__state_    = static_cast<path::iterator::_ParserState>(PP.State_);
388   it.__entry_    = PP.RawEntry;
389   it.__stashed_elem_.__assign_view(*PP);
390   return it;
391 }
392 
393 path::iterator path::end() const {
394   iterator it{};
395   it.__state_    = path::iterator::_AtEnd;
396   it.__path_ptr_ = this;
397   return it;
398 }
399 
400 path::iterator& path::iterator::__increment() {
401   PathParser PP(__path_ptr_->native(), __entry_, __state_);
402   ++PP;
403   __state_ = static_cast<_ParserState>(PP.State_);
404   __entry_ = PP.RawEntry;
405   __stashed_elem_.__assign_view(*PP);
406   return *this;
407 }
408 
409 path::iterator& path::iterator::__decrement() {
410   PathParser PP(__path_ptr_->native(), __entry_, __state_);
411   --PP;
412   __state_ = static_cast<_ParserState>(PP.State_);
413   __entry_ = PP.RawEntry;
414   __stashed_elem_.__assign_view(*PP);
415   return *this;
416 }
417 
418 #if defined(_LIBCPP_WIN32API)
419 ////////////////////////////////////////////////////////////////////////////
420 // Windows path conversions
421 size_t __wide_to_char(const wstring& str, char* out, size_t outlen) {
422   if (str.empty())
423     return 0;
424   ErrorHandler<size_t> err("__wide_to_char", nullptr);
425   UINT codepage     = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
426   BOOL used_default = FALSE;
427   int ret           = WideCharToMultiByte(codepage, 0, str.data(), str.size(), out, outlen, nullptr, &used_default);
428   if (ret <= 0 || used_default)
429     return err.report(errc::illegal_byte_sequence);
430   return ret;
431 }
432 
433 size_t __char_to_wide(const string& str, wchar_t* out, size_t outlen) {
434   if (str.empty())
435     return 0;
436   ErrorHandler<size_t> err("__char_to_wide", nullptr);
437   UINT codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
438   int ret       = MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, str.data(), str.size(), out, outlen);
439   if (ret <= 0)
440     return err.report(errc::illegal_byte_sequence);
441   return ret;
442 }
443 #endif
444 
445 _LIBCPP_END_NAMESPACE_FILESYSTEM
446