1 //===-- Args.h --------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLDB_UTILITY_ARGS_H 10 #define LLDB_UTILITY_ARGS_H 11 12 #include "lldb/Utility/Environment.h" 13 #include "lldb/lldb-private-types.h" 14 #include "lldb/lldb-types.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/StringExtras.h" 17 #include "llvm/ADT/StringRef.h" 18 #include <string> 19 #include <utility> 20 #include <vector> 21 22 namespace lldb_private { 23 24 /// \class Args Args.h "lldb/Utility/Args.h" 25 /// A command line argument class. 26 /// 27 /// The Args class is designed to be fed a command line. The command line is 28 /// copied into an internal buffer and then split up into arguments. Arguments 29 /// are space delimited if there are no quotes (single, double, or backtick 30 /// quotes) surrounding the argument. Spaces can be escaped using a \ 31 /// character to avoid having to surround an argument that contains a space 32 /// with quotes. 33 class Args { 34 public: 35 struct ArgEntry { 36 private: 37 friend class Args; 38 39 std::unique_ptr<char[]> ptr; 40 char quote = '\0'; 41 /// The position of the argument in the original argument string. 42 std::optional<uint16_t> column; 43 44 char *data() { return ptr.get(); } 45 46 public: 47 ArgEntry() = default; 48 ArgEntry(llvm::StringRef str, char quote, std::optional<uint16_t> column); 49 50 llvm::StringRef ref() const { return c_str(); } 51 const char *c_str() const { return ptr.get(); } 52 53 /// Returns true if this argument was quoted in any way. 54 bool IsQuoted() const { return quote != '\0'; } 55 char GetQuoteChar() const { return quote; } 56 std::optional<uint16_t> GetPos() const { return column; } 57 size_t GetLength() const { return ref().size(); } 58 }; 59 60 /// Construct with an option command string. 61 /// 62 /// \param[in] command 63 /// A NULL terminated command that will be copied and split up 64 /// into arguments. 65 /// 66 /// \see Args::SetCommandString(llvm::StringRef) 67 Args(llvm::StringRef command = llvm::StringRef()); 68 69 Args(const Args &rhs); 70 explicit Args(const StringList &list); 71 explicit Args(llvm::ArrayRef<llvm::StringRef> args); 72 73 Args &operator=(const Args &rhs); 74 75 /// Destructor. 76 ~Args(); 77 78 explicit Args(const Environment &env) : Args() { 79 SetArguments(const_cast<const char **>(env.getEnvp().get())); 80 } 81 82 explicit operator Environment() const { return GetConstArgumentVector(); } 83 84 /// Dump all entries to the stream \a s using label \a label_name. 85 /// 86 /// If label_name is nullptr, the dump operation is skipped. 87 /// 88 /// \param[in] s 89 /// The stream to which to dump all arguments in the argument 90 /// vector. 91 /// \param[in] label_name 92 /// The label_name to use as the label printed for each 93 /// entry of the args like so: 94 /// {label_name}[{index}]={value} 95 void Dump(Stream &s, const char *label_name = "argv") const; 96 97 /// Sets the command string contained by this object. 98 /// 99 /// The command string will be copied and split up into arguments that can 100 /// be accessed via the accessor functions. 101 /// 102 /// \param[in] command 103 /// A command StringRef that will be copied and split up 104 /// into arguments. 105 /// 106 /// \see Args::GetArgumentCount() const 107 /// \see Args::GetArgumentAtIndex (size_t) const @see 108 /// Args::GetArgumentVector () \see Args::Shift () \see Args::Unshift (const 109 /// char *) 110 void SetCommandString(llvm::StringRef command); 111 112 bool GetCommandString(std::string &command) const; 113 114 bool GetQuotedCommandString(std::string &command) const; 115 116 /// Gets the number of arguments left in this command object. 117 /// 118 /// \return 119 /// The number or arguments in this object. 120 size_t GetArgumentCount() const { return m_entries.size(); } 121 122 bool empty() const { return GetArgumentCount() == 0; } 123 124 /// Gets the NULL terminated C string argument pointer for the argument at 125 /// index \a idx. 126 /// 127 /// \return 128 /// The NULL terminated C string argument pointer if \a idx is a 129 /// valid argument index, NULL otherwise. 130 const char *GetArgumentAtIndex(size_t idx) const; 131 132 llvm::ArrayRef<ArgEntry> entries() const { return m_entries; } 133 134 using const_iterator = std::vector<ArgEntry>::const_iterator; 135 136 const_iterator begin() const { return m_entries.begin(); } 137 const_iterator end() const { return m_entries.end(); } 138 139 size_t size() const { return GetArgumentCount(); } 140 const ArgEntry &operator[](size_t n) const { return m_entries[n]; } 141 142 /// Gets the argument vector. 143 /// 144 /// The value returned by this function can be used by any function that 145 /// takes and vector. The return value is just like \a argv in the standard 146 /// C entry point function: 147 /// \code 148 /// int main (int argc, const char **argv); 149 /// \endcode 150 /// 151 /// \return 152 /// An array of NULL terminated C string argument pointers that 153 /// also has a terminating NULL C string pointer 154 char **GetArgumentVector(); 155 156 /// Gets the argument vector. 157 /// 158 /// The value returned by this function can be used by any function that 159 /// takes and vector. The return value is just like \a argv in the standard 160 /// C entry point function: 161 /// \code 162 /// int main (int argc, const char **argv); 163 /// \endcode 164 /// 165 /// \return 166 /// An array of NULL terminate C string argument pointers that 167 /// also has a terminating NULL C string pointer 168 const char **GetConstArgumentVector() const; 169 170 /// Gets the argument as an ArrayRef. Note that the return value does *not* 171 /// have a nullptr const char * at the end, as the size of the list is 172 /// embedded in the ArrayRef object. 173 llvm::ArrayRef<const char *> GetArgumentArrayRef() const { 174 return llvm::ArrayRef(m_argv).drop_back(); 175 } 176 177 /// Appends a new argument to the end of the list argument list. 178 /// 179 /// \param[in] arg_str 180 /// The new argument. 181 /// 182 /// \param[in] quote_char 183 /// If the argument was originally quoted, put in the quote char here. 184 void AppendArgument(llvm::StringRef arg_str, char quote_char = '\0'); 185 186 void AppendArguments(const Args &rhs); 187 188 void AppendArguments(const char **argv); 189 190 /// Insert the argument value at index \a idx to \a arg_str. 191 /// 192 /// \param[in] idx 193 /// The index of where to insert the argument. 194 /// 195 /// \param[in] arg_str 196 /// The new argument. 197 /// 198 /// \param[in] quote_char 199 /// If the argument was originally quoted, put in the quote char here. 200 void InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str, 201 char quote_char = '\0'); 202 203 /// Replaces the argument value at index \a idx to \a arg_str if \a idx is 204 /// a valid argument index. 205 /// 206 /// \param[in] idx 207 /// The index of the argument that will have its value replaced. 208 /// 209 /// \param[in] arg_str 210 /// The new argument. 211 /// 212 /// \param[in] quote_char 213 /// If the argument was originally quoted, put in the quote char here. 214 void ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str, 215 char quote_char = '\0'); 216 217 /// Deletes the argument value at index 218 /// if \a idx is a valid argument index. 219 /// 220 /// \param[in] idx 221 /// The index of the argument that will have its value replaced. 222 /// 223 void DeleteArgumentAtIndex(size_t idx); 224 225 /// Sets the argument vector value, optionally copying all arguments into an 226 /// internal buffer. 227 /// 228 /// Sets the arguments to match those found in \a argv. All argument strings 229 /// will be copied into an internal buffers. 230 // 231 // FIXME: Handle the quote character somehow. 232 void SetArguments(size_t argc, const char **argv); 233 234 void SetArguments(const char **argv); 235 236 /// Shifts the first argument C string value of the array off the argument 237 /// array. 238 /// 239 /// The string value will be freed, so a copy of the string should be made 240 /// by calling Args::GetArgumentAtIndex (size_t) const first and copying the 241 /// returned value before calling Args::Shift(). 242 /// 243 /// \see Args::GetArgumentAtIndex (size_t) const 244 void Shift(); 245 246 /// Inserts a class owned copy of \a arg_str at the beginning of the 247 /// argument vector. 248 /// 249 /// A copy \a arg_str will be made. 250 /// 251 /// \param[in] arg_str 252 /// The argument to push on the front of the argument stack. 253 /// 254 /// \param[in] quote_char 255 /// If the argument was originally quoted, put in the quote char here. 256 void Unshift(llvm::StringRef arg_str, char quote_char = '\0'); 257 258 /// Clear the arguments. 259 /// 260 /// For re-setting or blanking out the list of arguments. 261 void Clear(); 262 263 static lldb::Encoding 264 StringToEncoding(llvm::StringRef s, 265 lldb::Encoding fail_value = lldb::eEncodingInvalid); 266 267 static uint32_t StringToGenericRegister(llvm::StringRef s); 268 269 static std::string GetShellSafeArgument(const FileSpec &shell, 270 llvm::StringRef unsafe_arg); 271 272 /// EncodeEscapeSequences will change the textual representation of common 273 /// escape sequences like "\n" (two characters) into a single '\n'. It does 274 /// this for all of the supported escaped sequences and for the \0ooo (octal) 275 /// and \xXX (hex). The resulting "dst" string will contain the character 276 /// versions of all supported escape sequences. The common supported escape 277 /// sequences are: "\a", "\b", "\f", "\n", "\r", "\t", "\v", "\'", "\"", "\\". 278 static void EncodeEscapeSequences(const char *src, std::string &dst); 279 280 /// ExpandEscapeSequences will change a string of possibly non-printable 281 /// characters and expand them into text. So '\n' will turn into two 282 /// characters like "\n" which is suitable for human reading. When a character 283 /// is not printable and isn't one of the common in escape sequences listed in 284 /// the help for EncodeEscapeSequences, then it will be encoded as octal. 285 /// Printable characters are left alone. 286 static void ExpandEscapedCharacters(const char *src, std::string &dst); 287 288 static std::string EscapeLLDBCommandArgument(const std::string &arg, 289 char quote_char); 290 291 private: 292 std::vector<ArgEntry> m_entries; 293 /// The arguments as C strings with a trailing nullptr element. 294 /// 295 /// These strings are owned by the ArgEntry object in m_entries with the 296 /// same index. 297 std::vector<char *> m_argv; 298 }; 299 300 /// \class OptionsWithRaw Args.h "lldb/Utility/Args.h" 301 /// A pair of an option list with a 'raw' string as a suffix. 302 /// 303 /// This class works similar to Args, but handles the case where we have a 304 /// trailing string that shouldn't be interpreted as a list of arguments but 305 /// preserved as is. It is also only useful for handling command line options 306 /// (e.g. '-foo bar -i0') that start with a dash. 307 /// 308 /// The leading option list is optional. If the first non-space character 309 /// in the string starts with a dash, and the string contains an argument 310 /// that is an unquoted double dash (' -- '), then everything up to the double 311 /// dash is parsed as a list of arguments. Everything after the double dash 312 /// is interpreted as the raw suffix string. Note that the space behind the 313 /// double dash is not part of the raw suffix. 314 /// 315 /// All strings not matching the above format as considered to be just a raw 316 /// string without any options. 317 /// 318 /// \see Args 319 class OptionsWithRaw { 320 public: 321 /// Parse the given string as a list of optional arguments with a raw suffix. 322 /// 323 /// See the class description for a description of the input format. 324 /// 325 /// \param[in] argument_string 326 /// The string that should be parsed. 327 explicit OptionsWithRaw(llvm::StringRef argument_string); 328 329 /// Returns true if there are any arguments before the raw suffix. 330 bool HasArgs() const { return m_has_args; } 331 332 /// Returns the list of arguments. 333 /// 334 /// You can only call this method if HasArgs returns true. 335 Args &GetArgs() { 336 assert(m_has_args); 337 return m_args; 338 } 339 340 /// Returns the list of arguments. 341 /// 342 /// You can only call this method if HasArgs returns true. 343 const Args &GetArgs() const { 344 assert(m_has_args); 345 return m_args; 346 } 347 348 /// Returns the part of the input string that was used for parsing the 349 /// argument list. This string also includes the double dash that is used 350 /// for separating the argument list from the suffix. 351 /// 352 /// You can only call this method if HasArgs returns true. 353 llvm::StringRef GetArgStringWithDelimiter() const { 354 assert(m_has_args); 355 return m_arg_string_with_delimiter; 356 } 357 358 /// Returns the part of the input string that was used for parsing the 359 /// argument list. 360 /// 361 /// You can only call this method if HasArgs returns true. 362 llvm::StringRef GetArgString() const { 363 assert(m_has_args); 364 return m_arg_string; 365 } 366 367 /// Returns the raw suffix part of the parsed string. 368 const std::string &GetRawPart() const { return m_suffix; } 369 370 private: 371 void SetFromString(llvm::StringRef arg_string); 372 373 /// Keeps track if we have parsed and stored any arguments. 374 bool m_has_args = false; 375 Args m_args; 376 llvm::StringRef m_arg_string; 377 llvm::StringRef m_arg_string_with_delimiter; 378 379 // FIXME: This should be a StringRef, but some of the calling code expect a 380 // C string here so only a real std::string is possible. 381 std::string m_suffix; 382 }; 383 384 } // namespace lldb_private 385 386 #endif // LLDB_UTILITY_ARGS_H 387