xref: /llvm-project/lldb/include/lldb/Utility/Args.h (revision 089227feaf0efb5e540783a5542655e25669e7d8)
1 //===-- Args.h --------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_UTILITY_ARGS_H
10 #define LLDB_UTILITY_ARGS_H
11 
12 #include "lldb/Utility/Environment.h"
13 #include "lldb/lldb-private-types.h"
14 #include "lldb/lldb-types.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/ADT/StringRef.h"
18 #include <string>
19 #include <utility>
20 #include <vector>
21 
22 namespace lldb_private {
23 
24 /// \class Args Args.h "lldb/Utility/Args.h"
25 /// A command line argument class.
26 ///
27 /// The Args class is designed to be fed a command line. The command line is
28 /// copied into an internal buffer and then split up into arguments. Arguments
29 /// are space delimited if there are no quotes (single, double, or backtick
30 /// quotes) surrounding the argument. Spaces can be escaped using a \
31 /// character to avoid having to surround an argument that contains a space
32 /// with quotes.
33 class Args {
34 public:
35   struct ArgEntry {
36   private:
37     friend class Args;
38 
39     std::unique_ptr<char[]> ptr;
40     char quote = '\0';
41     /// The position of the argument in the original argument string.
42     std::optional<uint16_t> column;
43 
44     char *data() { return ptr.get(); }
45 
46   public:
47     ArgEntry() = default;
48     ArgEntry(llvm::StringRef str, char quote, std::optional<uint16_t> column);
49 
50     llvm::StringRef ref() const { return c_str(); }
51     const char *c_str() const { return ptr.get(); }
52 
53     /// Returns true if this argument was quoted in any way.
54     bool IsQuoted() const { return quote != '\0'; }
55     char GetQuoteChar() const { return quote; }
56     std::optional<uint16_t> GetPos() const { return column; }
57     size_t GetLength() const { return ref().size(); }
58   };
59 
60   /// Construct with an option command string.
61   ///
62   /// \param[in] command
63   ///     A NULL terminated command that will be copied and split up
64   ///     into arguments.
65   ///
66   /// \see Args::SetCommandString(llvm::StringRef)
67   Args(llvm::StringRef command = llvm::StringRef());
68 
69   Args(const Args &rhs);
70   explicit Args(const StringList &list);
71   explicit Args(llvm::ArrayRef<llvm::StringRef> args);
72 
73   Args &operator=(const Args &rhs);
74 
75   /// Destructor.
76   ~Args();
77 
78   explicit Args(const Environment &env) : Args() {
79     SetArguments(const_cast<const char **>(env.getEnvp().get()));
80   }
81 
82   explicit operator Environment() const { return GetConstArgumentVector(); }
83 
84   /// Dump all entries to the stream \a s using label \a label_name.
85   ///
86   /// If label_name is nullptr, the dump operation is skipped.
87   ///
88   /// \param[in] s
89   ///     The stream to which to dump all arguments in the argument
90   ///     vector.
91   /// \param[in] label_name
92   ///     The label_name to use as the label printed for each
93   ///     entry of the args like so:
94   ///       {label_name}[{index}]={value}
95   void Dump(Stream &s, const char *label_name = "argv") const;
96 
97   /// Sets the command string contained by this object.
98   ///
99   /// The command string will be copied and split up into arguments that can
100   /// be accessed via the accessor functions.
101   ///
102   /// \param[in] command
103   ///     A command StringRef that will be copied and split up
104   ///     into arguments.
105   ///
106   /// \see Args::GetArgumentCount() const
107   /// \see Args::GetArgumentAtIndex (size_t) const @see
108   /// Args::GetArgumentVector () \see Args::Shift () \see Args::Unshift (const
109   /// char *)
110   void SetCommandString(llvm::StringRef command);
111 
112   bool GetCommandString(std::string &command) const;
113 
114   bool GetQuotedCommandString(std::string &command) const;
115 
116   /// Gets the number of arguments left in this command object.
117   ///
118   /// \return
119   ///     The number or arguments in this object.
120   size_t GetArgumentCount() const { return m_entries.size(); }
121 
122   bool empty() const { return GetArgumentCount() == 0; }
123 
124   /// Gets the NULL terminated C string argument pointer for the argument at
125   /// index \a idx.
126   ///
127   /// \return
128   ///     The NULL terminated C string argument pointer if \a idx is a
129   ///     valid argument index, NULL otherwise.
130   const char *GetArgumentAtIndex(size_t idx) const;
131 
132   llvm::ArrayRef<ArgEntry> entries() const { return m_entries; }
133 
134   using const_iterator = std::vector<ArgEntry>::const_iterator;
135 
136   const_iterator begin() const { return m_entries.begin(); }
137   const_iterator end() const { return m_entries.end(); }
138 
139   size_t size() const { return GetArgumentCount(); }
140   const ArgEntry &operator[](size_t n) const { return m_entries[n]; }
141 
142   /// Gets the argument vector.
143   ///
144   /// The value returned by this function can be used by any function that
145   /// takes and vector. The return value is just like \a argv in the standard
146   /// C entry point function:
147   ///     \code
148   ///         int main (int argc, const char **argv);
149   ///     \endcode
150   ///
151   /// \return
152   ///     An array of NULL terminated C string argument pointers that
153   ///     also has a terminating NULL C string pointer
154   char **GetArgumentVector();
155 
156   /// Gets the argument vector.
157   ///
158   /// The value returned by this function can be used by any function that
159   /// takes and vector. The return value is just like \a argv in the standard
160   /// C entry point function:
161   ///     \code
162   ///         int main (int argc, const char **argv);
163   ///     \endcode
164   ///
165   /// \return
166   ///     An array of NULL terminate C string argument pointers that
167   ///     also has a terminating NULL C string pointer
168   const char **GetConstArgumentVector() const;
169 
170   /// Gets the argument as an ArrayRef. Note that the return value does *not*
171   /// have a nullptr const char * at the end, as the size of the list is
172   /// embedded in the ArrayRef object.
173   llvm::ArrayRef<const char *> GetArgumentArrayRef() const {
174     return llvm::ArrayRef(m_argv).drop_back();
175   }
176 
177   /// Appends a new argument to the end of the list argument list.
178   ///
179   /// \param[in] arg_str
180   ///     The new argument.
181   ///
182   /// \param[in] quote_char
183   ///     If the argument was originally quoted, put in the quote char here.
184   void AppendArgument(llvm::StringRef arg_str, char quote_char = '\0');
185 
186   void AppendArguments(const Args &rhs);
187 
188   void AppendArguments(const char **argv);
189 
190   /// Insert the argument value at index \a idx to \a arg_str.
191   ///
192   /// \param[in] idx
193   ///     The index of where to insert the argument.
194   ///
195   /// \param[in] arg_str
196   ///     The new argument.
197   ///
198   /// \param[in] quote_char
199   ///     If the argument was originally quoted, put in the quote char here.
200   void InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
201                              char quote_char = '\0');
202 
203   /// Replaces the argument value at index \a idx to \a arg_str if \a idx is
204   /// a valid argument index.
205   ///
206   /// \param[in] idx
207   ///     The index of the argument that will have its value replaced.
208   ///
209   /// \param[in] arg_str
210   ///     The new argument.
211   ///
212   /// \param[in] quote_char
213   ///     If the argument was originally quoted, put in the quote char here.
214   void ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
215                               char quote_char = '\0');
216 
217   /// Deletes the argument value at index
218   /// if \a idx is a valid argument index.
219   ///
220   /// \param[in] idx
221   ///     The index of the argument that will have its value replaced.
222   ///
223   void DeleteArgumentAtIndex(size_t idx);
224 
225   /// Sets the argument vector value, optionally copying all arguments into an
226   /// internal buffer.
227   ///
228   /// Sets the arguments to match those found in \a argv. All argument strings
229   /// will be copied into an internal buffers.
230   //
231   //  FIXME: Handle the quote character somehow.
232   void SetArguments(size_t argc, const char **argv);
233 
234   void SetArguments(const char **argv);
235 
236   /// Shifts the first argument C string value of the array off the argument
237   /// array.
238   ///
239   /// The string value will be freed, so a copy of the string should be made
240   /// by calling Args::GetArgumentAtIndex (size_t) const first and copying the
241   /// returned value before calling Args::Shift().
242   ///
243   /// \see Args::GetArgumentAtIndex (size_t) const
244   void Shift();
245 
246   /// Inserts a class owned copy of \a arg_str at the beginning of the
247   /// argument vector.
248   ///
249   /// A copy \a arg_str will be made.
250   ///
251   /// \param[in] arg_str
252   ///     The argument to push on the front of the argument stack.
253   ///
254   /// \param[in] quote_char
255   ///     If the argument was originally quoted, put in the quote char here.
256   void Unshift(llvm::StringRef arg_str, char quote_char = '\0');
257 
258   /// Clear the arguments.
259   ///
260   /// For re-setting or blanking out the list of arguments.
261   void Clear();
262 
263   static lldb::Encoding
264   StringToEncoding(llvm::StringRef s,
265                    lldb::Encoding fail_value = lldb::eEncodingInvalid);
266 
267   static uint32_t StringToGenericRegister(llvm::StringRef s);
268 
269   static std::string GetShellSafeArgument(const FileSpec &shell,
270                                           llvm::StringRef unsafe_arg);
271 
272   /// EncodeEscapeSequences will change the textual representation of common
273   /// escape sequences like "\n" (two characters) into a single '\n'. It does
274   /// this for all of the supported escaped sequences and for the \0ooo (octal)
275   /// and \xXX (hex). The resulting "dst" string will contain the character
276   /// versions of all supported escape sequences. The common supported escape
277   /// sequences are: "\a", "\b", "\f", "\n", "\r", "\t", "\v", "\'", "\"", "\\".
278   static void EncodeEscapeSequences(const char *src, std::string &dst);
279 
280   /// ExpandEscapeSequences will change a string of possibly non-printable
281   /// characters and expand them into text. So '\n' will turn into two
282   /// characters like "\n" which is suitable for human reading. When a character
283   /// is not printable and isn't one of the common in escape sequences listed in
284   /// the help for EncodeEscapeSequences, then it will be encoded as octal.
285   /// Printable characters are left alone.
286   static void ExpandEscapedCharacters(const char *src, std::string &dst);
287 
288   static std::string EscapeLLDBCommandArgument(const std::string &arg,
289                                                char quote_char);
290 
291 private:
292   std::vector<ArgEntry> m_entries;
293   /// The arguments as C strings with a trailing nullptr element.
294   ///
295   /// These strings are owned by the ArgEntry object in m_entries with the
296   /// same index.
297   std::vector<char *> m_argv;
298 };
299 
300 /// \class OptionsWithRaw Args.h "lldb/Utility/Args.h"
301 /// A pair of an option list with a 'raw' string as a suffix.
302 ///
303 /// This class works similar to Args, but handles the case where we have a
304 /// trailing string that shouldn't be interpreted as a list of arguments but
305 /// preserved as is. It is also only useful for handling command line options
306 /// (e.g. '-foo bar -i0') that start with a dash.
307 ///
308 /// The leading option list is optional. If the first non-space character
309 /// in the string starts with a dash, and the string contains an argument
310 /// that is an unquoted double dash (' -- '), then everything up to the double
311 /// dash is parsed as a list of arguments. Everything after the double dash
312 /// is interpreted as the raw suffix string. Note that the space behind the
313 /// double dash is not part of the raw suffix.
314 ///
315 /// All strings not matching the above format as considered to be just a raw
316 /// string without any options.
317 ///
318 /// \see Args
319 class OptionsWithRaw {
320 public:
321   /// Parse the given string as a list of optional arguments with a raw suffix.
322   ///
323   /// See the class description for a description of the input format.
324   ///
325   /// \param[in] argument_string
326   ///     The string that should be parsed.
327   explicit OptionsWithRaw(llvm::StringRef argument_string);
328 
329   /// Returns true if there are any arguments before the raw suffix.
330   bool HasArgs() const { return m_has_args; }
331 
332   /// Returns the list of arguments.
333   ///
334   /// You can only call this method if HasArgs returns true.
335   Args &GetArgs() {
336     assert(m_has_args);
337     return m_args;
338   }
339 
340   /// Returns the list of arguments.
341   ///
342   /// You can only call this method if HasArgs returns true.
343   const Args &GetArgs() const {
344     assert(m_has_args);
345     return m_args;
346   }
347 
348   /// Returns the part of the input string that was used for parsing the
349   /// argument list. This string also includes the double dash that is used
350   /// for separating the argument list from the suffix.
351   ///
352   /// You can only call this method if HasArgs returns true.
353   llvm::StringRef GetArgStringWithDelimiter() const {
354     assert(m_has_args);
355     return m_arg_string_with_delimiter;
356   }
357 
358   /// Returns the part of the input string that was used for parsing the
359   /// argument list.
360   ///
361   /// You can only call this method if HasArgs returns true.
362   llvm::StringRef GetArgString() const {
363     assert(m_has_args);
364     return m_arg_string;
365   }
366 
367   /// Returns the raw suffix part of the parsed string.
368   const std::string &GetRawPart() const { return m_suffix; }
369 
370 private:
371   void SetFromString(llvm::StringRef arg_string);
372 
373   /// Keeps track if we have parsed and stored any arguments.
374   bool m_has_args = false;
375   Args m_args;
376   llvm::StringRef m_arg_string;
377   llvm::StringRef m_arg_string_with_delimiter;
378 
379   // FIXME: This should be a StringRef, but some of the calling code expect a
380   // C string here so only a real std::string is possible.
381   std::string m_suffix;
382 };
383 
384 } // namespace lldb_private
385 
386 #endif // LLDB_UTILITY_ARGS_H
387