xref: /llvm-project/llvm/include/llvm/Support/Path.h (revision 6c7a53b78fd695ee58ea42d21362f6fa861eb37e)
1 //===- llvm/Support/Path.h - Path Operating System Concept ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the llvm::sys::path namespace. It is designed after
10 // TR2/boost filesystem (v3), but modified to remove exception handling and the
11 // path class.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_SUPPORT_PATH_H
16 #define LLVM_SUPPORT_PATH_H
17 
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/ADT/iterator.h"
20 #include "llvm/Support/DataTypes.h"
21 #include <iterator>
22 
23 namespace llvm {
24 namespace sys {
25 namespace path {
26 
27 enum class Style {
28   native,
29   posix,
30   windows_slash,
31   windows_backslash,
32   windows = windows_backslash, // deprecated
33 };
34 
35 /// Check if \p S uses POSIX path rules.
36 constexpr bool is_style_posix(Style S) {
37   if (S == Style::posix)
38     return true;
39   if (S != Style::native)
40     return false;
41 #if defined(_WIN32)
42   return false;
43 #else
44   return true;
45 #endif
46 }
47 
48 /// Check if \p S uses Windows path rules.
49 constexpr bool is_style_windows(Style S) { return !is_style_posix(S); }
50 
51 /// @name Lexical Component Iterator
52 /// @{
53 
54 /// Path iterator.
55 ///
56 /// This is an input iterator that iterates over the individual components in
57 /// \a path. The traversal order is as follows:
58 /// * The root-name element, if present.
59 /// * The root-directory element, if present.
60 /// * Each successive filename element, if present.
61 /// * Dot, if one or more trailing non-root slash characters are present.
62 /// Traversing backwards is possible with \a reverse_iterator
63 ///
64 /// Iteration examples. Each component is separated by ',':
65 /// @code
66 ///   /          => /
67 ///   /foo       => /,foo
68 ///   foo/       => foo,.
69 ///   /foo/bar   => /,foo,bar
70 ///   ../        => ..,.
71 ///   C:\foo\bar => C:,\,foo,bar
72 /// @endcode
73 class const_iterator
74     : public iterator_facade_base<const_iterator, std::input_iterator_tag,
75                                   const StringRef> {
76   StringRef Path;          ///< The entire path.
77   StringRef Component;     ///< The current component. Not necessarily in Path.
78   size_t    Position = 0;  ///< The iterators current position within Path.
79   Style S = Style::native; ///< The path style to use.
80 
81   // An end iterator has Position = Path.size() + 1.
82   friend const_iterator begin(StringRef path, Style style);
83   friend const_iterator end(StringRef path);
84 
85 public:
86   reference operator*() const { return Component; }
87   const_iterator &operator++();    // preincrement
88   bool operator==(const const_iterator &RHS) const;
89 
90   /// Difference in bytes between this and RHS.
91   ptrdiff_t operator-(const const_iterator &RHS) const;
92 };
93 
94 /// Reverse path iterator.
95 ///
96 /// This is an input iterator that iterates over the individual components in
97 /// \a path in reverse order. The traversal order is exactly reversed from that
98 /// of \a const_iterator
99 class reverse_iterator
100     : public iterator_facade_base<reverse_iterator, std::input_iterator_tag,
101                                   const StringRef> {
102   StringRef Path;          ///< The entire path.
103   StringRef Component;     ///< The current component. Not necessarily in Path.
104   size_t    Position = 0;  ///< The iterators current position within Path.
105   Style S = Style::native; ///< The path style to use.
106 
107   friend reverse_iterator rbegin(StringRef path, Style style);
108   friend reverse_iterator rend(StringRef path);
109 
110 public:
111   reference operator*() const { return Component; }
112   reverse_iterator &operator++();    // preincrement
113   bool operator==(const reverse_iterator &RHS) const;
114 
115   /// Difference in bytes between this and RHS.
116   ptrdiff_t operator-(const reverse_iterator &RHS) const;
117 };
118 
119 /// Get begin iterator over \a path.
120 /// @param path Input path.
121 /// @returns Iterator initialized with the first component of \a path.
122 const_iterator begin(StringRef path LLVM_LIFETIME_BOUND,
123                      Style style = Style::native);
124 
125 /// Get end iterator over \a path.
126 /// @param path Input path.
127 /// @returns Iterator initialized to the end of \a path.
128 const_iterator end(StringRef path LLVM_LIFETIME_BOUND);
129 
130 /// Get reverse begin iterator over \a path.
131 /// @param path Input path.
132 /// @returns Iterator initialized with the first reverse component of \a path.
133 reverse_iterator rbegin(StringRef path LLVM_LIFETIME_BOUND,
134                         Style style = Style::native);
135 
136 /// Get reverse end iterator over \a path.
137 /// @param path Input path.
138 /// @returns Iterator initialized to the reverse end of \a path.
139 reverse_iterator rend(StringRef path LLVM_LIFETIME_BOUND);
140 
141 /// @}
142 /// @name Lexical Modifiers
143 /// @{
144 
145 /// Remove the last component from \a path unless it is the root dir.
146 ///
147 /// Similar to the POSIX "dirname" utility.
148 ///
149 /// @code
150 ///   directory/filename.cpp => directory/
151 ///   directory/             => directory
152 ///   filename.cpp           => <empty>
153 ///   /                      => /
154 /// @endcode
155 ///
156 /// @param path A path that is modified to not have a file component.
157 void remove_filename(SmallVectorImpl<char> &path, Style style = Style::native);
158 
159 /// Replace the file extension of \a path with \a extension.
160 ///
161 /// @code
162 ///   ./filename.cpp => ./filename.extension
163 ///   ./filename     => ./filename.extension
164 ///   ./             => ./.extension
165 /// @endcode
166 ///
167 /// @param path A path that has its extension replaced with \a extension.
168 /// @param extension The extension to be added. It may be empty. It may also
169 ///                  optionally start with a '.', if it does not, one will be
170 ///                  prepended.
171 void replace_extension(SmallVectorImpl<char> &path, const Twine &extension,
172                        Style style = Style::native);
173 
174 /// Replace matching path prefix with another path.
175 ///
176 /// @code
177 ///   /foo, /old, /new => /foo
178 ///   /old, /old, /new => /new
179 ///   /old, /old/, /new => /old
180 ///   /old/foo, /old, /new => /new/foo
181 ///   /old/foo, /old/, /new => /new/foo
182 ///   /old/foo, /old/, /new/ => /new/foo
183 ///   /oldfoo, /old, /new => /oldfoo
184 ///   /foo, <empty>, /new => /new/foo
185 ///   /foo, <empty>, new => new/foo
186 ///   /old/foo, /old, <empty> => /foo
187 /// @endcode
188 ///
189 /// @param Path If \a Path starts with \a OldPrefix modify to instead
190 ///        start with \a NewPrefix.
191 /// @param OldPrefix The path prefix to strip from \a Path.
192 /// @param NewPrefix The path prefix to replace \a NewPrefix with.
193 /// @param style The style used to match the prefix. Exact match using
194 /// Posix style, case/separator insensitive match for Windows style.
195 /// @result true if \a Path begins with OldPrefix
196 bool replace_path_prefix(SmallVectorImpl<char> &Path, StringRef OldPrefix,
197                          StringRef NewPrefix,
198                          Style style = Style::native);
199 
200 /// Remove redundant leading "./" pieces and consecutive separators.
201 ///
202 /// @param path Input path.
203 /// @result The cleaned-up \a path.
204 StringRef remove_leading_dotslash(StringRef path LLVM_LIFETIME_BOUND,
205                                   Style style = Style::native);
206 
207 /// In-place remove any './' and optionally '../' components from a path.
208 ///
209 /// @param path processed path
210 /// @param remove_dot_dot specify if '../' (except for leading "../") should be
211 /// removed
212 /// @result True if path was changed
213 bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false,
214                  Style style = Style::native);
215 
216 /// Append to path.
217 ///
218 /// @code
219 ///   /foo  + bar/f => /foo/bar/f
220 ///   /foo/ + bar/f => /foo/bar/f
221 ///   foo   + bar/f => foo/bar/f
222 /// @endcode
223 ///
224 /// @param path Set to \a path + \a component.
225 /// @param a The component to be appended to \a path.
226 void append(SmallVectorImpl<char> &path, const Twine &a,
227                                          const Twine &b = "",
228                                          const Twine &c = "",
229                                          const Twine &d = "");
230 
231 void append(SmallVectorImpl<char> &path, Style style, const Twine &a,
232             const Twine &b = "", const Twine &c = "", const Twine &d = "");
233 
234 /// Append to path.
235 ///
236 /// @code
237 ///   /foo  + [bar,f] => /foo/bar/f
238 ///   /foo/ + [bar,f] => /foo/bar/f
239 ///   foo   + [bar,f] => foo/bar/f
240 /// @endcode
241 ///
242 /// @param path Set to \a path + [\a begin, \a end).
243 /// @param begin Start of components to append.
244 /// @param end One past the end of components to append.
245 void append(SmallVectorImpl<char> &path, const_iterator begin,
246             const_iterator end, Style style = Style::native);
247 
248 /// @}
249 /// @name Transforms (or some other better name)
250 /// @{
251 
252 /// Convert path to the native form. This is used to give paths to users and
253 /// operating system calls in the platform's normal way. For example, on Windows
254 /// all '/' are converted to '\'. On Unix, it converts all '\' to '/'.
255 ///
256 /// @param path A path that is transformed to native format.
257 /// @param result Holds the result of the transformation.
258 void native(const Twine &path, SmallVectorImpl<char> &result,
259             Style style = Style::native);
260 
261 /// Convert path to the native form in place. This is used to give paths to
262 /// users and operating system calls in the platform's normal way. For example,
263 /// on Windows all '/' are converted to '\'.
264 ///
265 /// @param path A path that is transformed to native format.
266 void native(SmallVectorImpl<char> &path, Style style = Style::native);
267 
268 /// For Windows path styles, convert path to use the preferred path separators.
269 /// For other styles, do nothing.
270 ///
271 /// @param path A path that is transformed to preferred format.
272 inline void make_preferred(SmallVectorImpl<char> &path,
273                            Style style = Style::native) {
274   if (!is_style_windows(style))
275     return;
276   native(path, style);
277 }
278 
279 /// Replaces backslashes with slashes if Windows.
280 ///
281 /// @param path processed path
282 /// @result The result of replacing backslashes with forward slashes if Windows.
283 /// On Unix, this function is a no-op because backslashes are valid path
284 /// chracters.
285 std::string convert_to_slash(StringRef path, Style style = Style::native);
286 
287 /// @}
288 /// @name Lexical Observers
289 /// @{
290 
291 /// Get root name.
292 ///
293 /// @code
294 ///   //net/hello => //net
295 ///   c:/hello    => c: (on Windows, on other platforms nothing)
296 ///   /hello      => <empty>
297 /// @endcode
298 ///
299 /// @param path Input path.
300 /// @result The root name of \a path if it has one, otherwise "".
301 StringRef root_name(StringRef path LLVM_LIFETIME_BOUND,
302                     Style style = Style::native);
303 
304 /// Get root directory.
305 ///
306 /// @code
307 ///   /goo/hello => /
308 ///   c:/hello   => /
309 ///   d/file.txt => <empty>
310 /// @endcode
311 ///
312 /// @param path Input path.
313 /// @result The root directory of \a path if it has one, otherwise
314 ///               "".
315 StringRef root_directory(StringRef path LLVM_LIFETIME_BOUND,
316                          Style style = Style::native);
317 
318 /// Get root path.
319 ///
320 /// Equivalent to root_name + root_directory.
321 ///
322 /// @param path Input path.
323 /// @result The root path of \a path if it has one, otherwise "".
324 StringRef root_path(StringRef path LLVM_LIFETIME_BOUND,
325                     Style style = Style::native);
326 
327 /// Get relative path.
328 ///
329 /// @code
330 ///   C:\hello\world => hello\world
331 ///   foo/bar        => foo/bar
332 ///   /foo/bar       => foo/bar
333 /// @endcode
334 ///
335 /// @param path Input path.
336 /// @result The path starting after root_path if one exists, otherwise "".
337 StringRef relative_path(StringRef path LLVM_LIFETIME_BOUND,
338                         Style style = Style::native);
339 
340 /// Get parent path.
341 ///
342 /// @code
343 ///   /          => <empty>
344 ///   /foo       => /
345 ///   foo/../bar => foo/..
346 /// @endcode
347 ///
348 /// @param path Input path.
349 /// @result The parent path of \a path if one exists, otherwise "".
350 StringRef parent_path(StringRef path LLVM_LIFETIME_BOUND,
351                       Style style = Style::native);
352 
353 /// Get filename.
354 ///
355 /// @code
356 ///   /foo.txt    => foo.txt
357 ///   .          => .
358 ///   ..         => ..
359 ///   /          => /
360 /// @endcode
361 ///
362 /// @param path Input path.
363 /// @result The filename part of \a path. This is defined as the last component
364 ///         of \a path. Similar to the POSIX "basename" utility.
365 StringRef filename(StringRef path LLVM_LIFETIME_BOUND,
366                    Style style = Style::native);
367 
368 /// Get stem.
369 ///
370 /// If filename contains a dot but not solely one or two dots, result is the
371 /// substring of filename ending at (but not including) the last dot. Otherwise
372 /// it is filename.
373 ///
374 /// @code
375 ///   /foo/bar.txt => bar
376 ///   /foo/bar     => bar
377 ///   /foo/.txt    => <empty>
378 ///   /foo/.       => .
379 ///   /foo/..      => ..
380 /// @endcode
381 ///
382 /// @param path Input path.
383 /// @result The stem of \a path.
384 StringRef stem(StringRef path LLVM_LIFETIME_BOUND, Style style = Style::native);
385 
386 /// Get extension.
387 ///
388 /// If filename contains a dot but not solely one or two dots, result is the
389 /// substring of filename starting at (and including) the last dot, and ending
390 /// at the end of \a path. Otherwise "".
391 ///
392 /// @code
393 ///   /foo/bar.txt => .txt
394 ///   /foo/bar     => <empty>
395 ///   /foo/.txt    => .txt
396 /// @endcode
397 ///
398 /// @param path Input path.
399 /// @result The extension of \a path.
400 StringRef extension(StringRef path LLVM_LIFETIME_BOUND,
401                     Style style = Style::native);
402 
403 /// Check whether the given char is a path separator on the host OS.
404 ///
405 /// @param value a character
406 /// @result true if \a value is a path separator character on the host OS
407 bool is_separator(char value, Style style = Style::native);
408 
409 /// Return the preferred separator for this platform.
410 ///
411 /// @result StringRef of the preferred separator, null-terminated.
412 StringRef get_separator(Style style = Style::native);
413 
414 /// Get the typical temporary directory for the system, e.g.,
415 /// "/var/tmp" or "C:/TEMP"
416 ///
417 /// @param erasedOnReboot Whether to favor a path that is erased on reboot
418 /// rather than one that potentially persists longer. This parameter will be
419 /// ignored if the user or system has set the typical environment variable
420 /// (e.g., TEMP on Windows, TMPDIR on *nix) to specify a temporary directory.
421 ///
422 /// @param result Holds the resulting path name.
423 void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result);
424 
425 /// Get the user's home directory.
426 ///
427 /// @param result Holds the resulting path name.
428 /// @result True if a home directory is set, false otherwise.
429 bool home_directory(SmallVectorImpl<char> &result);
430 
431 /// Get the directory where packages should read user-specific configurations.
432 /// e.g. $XDG_CONFIG_HOME.
433 ///
434 /// @param result Holds the resulting path name.
435 /// @result True if the appropriate path was determined, it need not exist.
436 bool user_config_directory(SmallVectorImpl<char> &result);
437 
438 /// Get the directory where installed packages should put their
439 /// machine-local cache, e.g. $XDG_CACHE_HOME.
440 ///
441 /// @param result Holds the resulting path name.
442 /// @result True if the appropriate path was determined, it need not exist.
443 bool cache_directory(SmallVectorImpl<char> &result);
444 
445 /// Has root name?
446 ///
447 /// root_name != ""
448 ///
449 /// @param path Input path.
450 /// @result True if the path has a root name, false otherwise.
451 bool has_root_name(const Twine &path, Style style = Style::native);
452 
453 /// Has root directory?
454 ///
455 /// root_directory != ""
456 ///
457 /// @param path Input path.
458 /// @result True if the path has a root directory, false otherwise.
459 bool has_root_directory(const Twine &path, Style style = Style::native);
460 
461 /// Has root path?
462 ///
463 /// root_path != ""
464 ///
465 /// @param path Input path.
466 /// @result True if the path has a root path, false otherwise.
467 bool has_root_path(const Twine &path, Style style = Style::native);
468 
469 /// Has relative path?
470 ///
471 /// relative_path != ""
472 ///
473 /// @param path Input path.
474 /// @result True if the path has a relative path, false otherwise.
475 bool has_relative_path(const Twine &path, Style style = Style::native);
476 
477 /// Has parent path?
478 ///
479 /// parent_path != ""
480 ///
481 /// @param path Input path.
482 /// @result True if the path has a parent path, false otherwise.
483 bool has_parent_path(const Twine &path, Style style = Style::native);
484 
485 /// Has filename?
486 ///
487 /// filename != ""
488 ///
489 /// @param path Input path.
490 /// @result True if the path has a filename, false otherwise.
491 bool has_filename(const Twine &path, Style style = Style::native);
492 
493 /// Has stem?
494 ///
495 /// stem != ""
496 ///
497 /// @param path Input path.
498 /// @result True if the path has a stem, false otherwise.
499 bool has_stem(const Twine &path, Style style = Style::native);
500 
501 /// Has extension?
502 ///
503 /// extension != ""
504 ///
505 /// @param path Input path.
506 /// @result True if the path has a extension, false otherwise.
507 bool has_extension(const Twine &path, Style style = Style::native);
508 
509 /// Is path absolute?
510 ///
511 /// According to cppreference.com, C++17 states: "An absolute path is a path
512 /// that unambiguously identifies the location of a file without reference to
513 /// an additional starting location."
514 ///
515 /// In other words, the rules are:
516 /// 1) POSIX style paths with nonempty root directory are absolute.
517 /// 2) Windows style paths with nonempty root name and root directory are
518 ///    absolute.
519 /// 3) No other paths are absolute.
520 ///
521 /// \see has_root_name
522 /// \see has_root_directory
523 ///
524 /// @param path Input path.
525 /// @result True if the path is absolute, false if it is not.
526 bool is_absolute(const Twine &path, Style style = Style::native);
527 
528 /// Is path absolute using GNU rules?
529 ///
530 /// GNU rules are:
531 /// 1) Paths starting with a path separator are absolute.
532 /// 2) Windows style paths are also absolute if they start with a character
533 ///    followed by ':'.
534 /// 3) No other paths are absolute.
535 ///
536 /// On Windows style the path "C:\Users\Default" has "C:" as root name and "\"
537 /// as root directory.
538 ///
539 /// Hence "C:" on Windows is absolute under GNU rules and not absolute under
540 /// C++17 because it has no root directory. Likewise "/" and "\" on Windows are
541 /// absolute under GNU and are not absolute under C++17 due to empty root name.
542 ///
543 /// \see has_root_name
544 /// \see has_root_directory
545 ///
546 /// @param path Input path.
547 /// @param style The style of \p path (e.g. Windows or POSIX). "native" style
548 /// means to derive the style from the host.
549 /// @result True if the path is absolute following GNU rules, false if it is
550 /// not.
551 bool is_absolute_gnu(const Twine &path, Style style = Style::native);
552 
553 /// Is path relative?
554 ///
555 /// @param path Input path.
556 /// @result True if the path is relative, false if it is not.
557 bool is_relative(const Twine &path, Style style = Style::native);
558 
559 } // end namespace path
560 } // end namespace sys
561 } // end namespace llvm
562 
563 #endif
564