1*dda28197Spatrick //===-- StringPrinter.cpp -------------------------------------------------===// 2061da546Spatrick // 3061da546Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4061da546Spatrick // See https://llvm.org/LICENSE.txt for license information. 5061da546Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6061da546Spatrick // 7061da546Spatrick //===----------------------------------------------------------------------===// 8061da546Spatrick 9061da546Spatrick #include "lldb/DataFormatters/StringPrinter.h" 10061da546Spatrick 11061da546Spatrick #include "lldb/Core/Debugger.h" 12061da546Spatrick #include "lldb/Core/ValueObject.h" 13061da546Spatrick #include "lldb/Target/Language.h" 14061da546Spatrick #include "lldb/Target/Process.h" 15061da546Spatrick #include "lldb/Target/Target.h" 16061da546Spatrick #include "lldb/Utility/Status.h" 17061da546Spatrick 18*dda28197Spatrick #include "llvm/ADT/StringExtras.h" 19061da546Spatrick #include "llvm/Support/ConvertUTF.h" 20061da546Spatrick 21061da546Spatrick #include <ctype.h> 22061da546Spatrick #include <locale> 23061da546Spatrick #include <memory> 24061da546Spatrick 25061da546Spatrick using namespace lldb; 26061da546Spatrick using namespace lldb_private; 27061da546Spatrick using namespace lldb_private::formatters; 28*dda28197Spatrick using GetPrintableElementType = StringPrinter::GetPrintableElementType; 29*dda28197Spatrick using StringElementType = StringPrinter::StringElementType; 30*dda28197Spatrick 31*dda28197Spatrick /// DecodedCharBuffer stores the decoded contents of a single character. It 32*dda28197Spatrick /// avoids managing memory on the heap by copying decoded bytes into an in-line 33*dda28197Spatrick /// buffer. 34*dda28197Spatrick class DecodedCharBuffer { 35*dda28197Spatrick public: 36*dda28197Spatrick DecodedCharBuffer(std::nullptr_t) {} 37*dda28197Spatrick 38*dda28197Spatrick DecodedCharBuffer(const uint8_t *bytes, size_t size) : m_size(size) { 39*dda28197Spatrick if (size > MaxLength) 40*dda28197Spatrick llvm_unreachable("unsupported length"); 41*dda28197Spatrick memcpy(m_data, bytes, size); 42*dda28197Spatrick } 43*dda28197Spatrick 44*dda28197Spatrick DecodedCharBuffer(const char *bytes, size_t size) 45*dda28197Spatrick : DecodedCharBuffer(reinterpret_cast<const uint8_t *>(bytes), size) {} 46*dda28197Spatrick 47*dda28197Spatrick const uint8_t *GetBytes() const { return m_data; } 48*dda28197Spatrick 49*dda28197Spatrick size_t GetSize() const { return m_size; } 50*dda28197Spatrick 51*dda28197Spatrick private: 52*dda28197Spatrick static constexpr unsigned MaxLength = 16; 53*dda28197Spatrick 54*dda28197Spatrick size_t m_size = 0; 55*dda28197Spatrick uint8_t m_data[MaxLength] = {0}; 56*dda28197Spatrick }; 57*dda28197Spatrick 58*dda28197Spatrick using EscapingHelper = 59*dda28197Spatrick std::function<DecodedCharBuffer(uint8_t *, uint8_t *, uint8_t *&)>; 60061da546Spatrick 61061da546Spatrick // we define this for all values of type but only implement it for those we 62061da546Spatrick // care about that's good because we get linker errors for any unsupported type 63*dda28197Spatrick template <StringElementType type> 64*dda28197Spatrick static DecodedCharBuffer 65*dda28197Spatrick GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, 66*dda28197Spatrick StringPrinter::EscapeStyle escape_style); 67061da546Spatrick 68*dda28197Spatrick // Mimic isprint() for Unicode codepoints. 69*dda28197Spatrick static bool isprint32(char32_t codepoint) { 70061da546Spatrick if (codepoint <= 0x1F || codepoint == 0x7F) // C0 71061da546Spatrick { 72061da546Spatrick return false; 73061da546Spatrick } 74061da546Spatrick if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 75061da546Spatrick { 76061da546Spatrick return false; 77061da546Spatrick } 78061da546Spatrick if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 79061da546Spatrick { 80061da546Spatrick return false; 81061da546Spatrick } 82061da546Spatrick if (codepoint == 0x200E || codepoint == 0x200F || 83061da546Spatrick (codepoint >= 0x202A && 84061da546Spatrick codepoint <= 0x202E)) // bidirectional text control 85061da546Spatrick { 86061da546Spatrick return false; 87061da546Spatrick } 88061da546Spatrick if (codepoint >= 0xFFF9 && 89061da546Spatrick codepoint <= 0xFFFF) // interlinears and generally specials 90061da546Spatrick { 91061da546Spatrick return false; 92061da546Spatrick } 93061da546Spatrick return true; 94061da546Spatrick } 95061da546Spatrick 96*dda28197Spatrick DecodedCharBuffer attemptASCIIEscape(llvm::UTF32 c, 97*dda28197Spatrick StringPrinter::EscapeStyle escape_style) { 98*dda28197Spatrick const bool is_swift_escape_style = 99*dda28197Spatrick escape_style == StringPrinter::EscapeStyle::Swift; 100*dda28197Spatrick switch (c) { 101061da546Spatrick case 0: 102*dda28197Spatrick return {"\\0", 2}; 103061da546Spatrick case '\a': 104*dda28197Spatrick return {"\\a", 2}; 105061da546Spatrick case '\b': 106*dda28197Spatrick if (is_swift_escape_style) 107*dda28197Spatrick return nullptr; 108*dda28197Spatrick return {"\\b", 2}; 109061da546Spatrick case '\f': 110*dda28197Spatrick if (is_swift_escape_style) 111*dda28197Spatrick return nullptr; 112*dda28197Spatrick return {"\\f", 2}; 113061da546Spatrick case '\n': 114*dda28197Spatrick return {"\\n", 2}; 115061da546Spatrick case '\r': 116*dda28197Spatrick return {"\\r", 2}; 117061da546Spatrick case '\t': 118*dda28197Spatrick return {"\\t", 2}; 119061da546Spatrick case '\v': 120*dda28197Spatrick if (is_swift_escape_style) 121*dda28197Spatrick return nullptr; 122*dda28197Spatrick return {"\\v", 2}; 123061da546Spatrick case '\"': 124*dda28197Spatrick return {"\\\"", 2}; 125*dda28197Spatrick case '\'': 126*dda28197Spatrick if (is_swift_escape_style) 127*dda28197Spatrick return {"\\'", 2}; 128*dda28197Spatrick return nullptr; 129061da546Spatrick case '\\': 130*dda28197Spatrick return {"\\\\", 2}; 131061da546Spatrick } 132*dda28197Spatrick return nullptr; 133061da546Spatrick } 134061da546Spatrick 135061da546Spatrick template <> 136*dda28197Spatrick DecodedCharBuffer GetPrintableImpl<StringElementType::ASCII>( 137*dda28197Spatrick uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, 138*dda28197Spatrick StringPrinter::EscapeStyle escape_style) { 139*dda28197Spatrick // The ASCII helper always advances 1 byte at a time. 140061da546Spatrick next = buffer + 1; 141*dda28197Spatrick 142*dda28197Spatrick DecodedCharBuffer retval = attemptASCIIEscape(*buffer, escape_style); 143*dda28197Spatrick if (retval.GetSize()) 144061da546Spatrick return retval; 145*dda28197Spatrick 146*dda28197Spatrick // Use llvm's locale-independent isPrint(char), instead of the libc 147*dda28197Spatrick // implementation which may give different results on different platforms. 148*dda28197Spatrick if (llvm::isPrint(*buffer)) 149*dda28197Spatrick return {buffer, 1}; 150*dda28197Spatrick 151*dda28197Spatrick unsigned escaped_len; 152*dda28197Spatrick constexpr unsigned max_buffer_size = 7; 153*dda28197Spatrick uint8_t data[max_buffer_size]; 154*dda28197Spatrick switch (escape_style) { 155*dda28197Spatrick case StringPrinter::EscapeStyle::CXX: 156*dda28197Spatrick // Prints 4 characters, then a \0 terminator. 157*dda28197Spatrick escaped_len = sprintf((char *)data, "\\x%02x", *buffer); 158*dda28197Spatrick break; 159*dda28197Spatrick case StringPrinter::EscapeStyle::Swift: 160*dda28197Spatrick // Prints up to 6 characters, then a \0 terminator. 161*dda28197Spatrick escaped_len = sprintf((char *)data, "\\u{%x}", *buffer); 162*dda28197Spatrick break; 163*dda28197Spatrick } 164*dda28197Spatrick lldbassert(escaped_len > 0 && "unknown string escape style"); 165*dda28197Spatrick return {data, escaped_len}; 166061da546Spatrick } 167061da546Spatrick 168*dda28197Spatrick template <> 169*dda28197Spatrick DecodedCharBuffer GetPrintableImpl<StringElementType::UTF8>( 170*dda28197Spatrick uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, 171*dda28197Spatrick StringPrinter::EscapeStyle escape_style) { 172*dda28197Spatrick // If the utf8 encoded length is invalid (i.e., not in the closed interval 173*dda28197Spatrick // [1;4]), or if there aren't enough bytes to print, or if the subsequence 174*dda28197Spatrick // isn't valid utf8, fall back to printing an ASCII-escaped subsequence. 175*dda28197Spatrick if (!llvm::isLegalUTF8Sequence(buffer, buffer_end)) 176*dda28197Spatrick return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next, 177*dda28197Spatrick escape_style); 178061da546Spatrick 179*dda28197Spatrick // Convert the valid utf8 sequence to a utf32 codepoint. This cannot fail. 180*dda28197Spatrick llvm::UTF32 codepoint = 0; 181*dda28197Spatrick const llvm::UTF8 *buffer_for_conversion = buffer; 182*dda28197Spatrick llvm::ConversionResult result = llvm::convertUTF8Sequence( 183*dda28197Spatrick &buffer_for_conversion, buffer_end, &codepoint, llvm::strictConversion); 184*dda28197Spatrick assert(result == llvm::conversionOK && 185*dda28197Spatrick "Failed to convert legal utf8 sequence"); 186*dda28197Spatrick (void)result; 187*dda28197Spatrick 188*dda28197Spatrick // The UTF8 helper always advances by the utf8 encoded length. 189*dda28197Spatrick const unsigned utf8_encoded_len = buffer_for_conversion - buffer; 190061da546Spatrick next = buffer + utf8_encoded_len; 191061da546Spatrick 192*dda28197Spatrick DecodedCharBuffer retval = attemptASCIIEscape(codepoint, escape_style); 193*dda28197Spatrick if (retval.GetSize()) 194061da546Spatrick return retval; 195*dda28197Spatrick if (isprint32(codepoint)) 196*dda28197Spatrick return {buffer, utf8_encoded_len}; 197*dda28197Spatrick 198*dda28197Spatrick unsigned escaped_len; 199*dda28197Spatrick constexpr unsigned max_buffer_size = 13; 200*dda28197Spatrick uint8_t data[max_buffer_size]; 201*dda28197Spatrick switch (escape_style) { 202*dda28197Spatrick case StringPrinter::EscapeStyle::CXX: 203*dda28197Spatrick // Prints 10 characters, then a \0 terminator. 204*dda28197Spatrick escaped_len = sprintf((char *)data, "\\U%08x", codepoint); 205*dda28197Spatrick break; 206*dda28197Spatrick case StringPrinter::EscapeStyle::Swift: 207*dda28197Spatrick // Prints up to 12 characters, then a \0 terminator. 208*dda28197Spatrick escaped_len = sprintf((char *)data, "\\u{%x}", codepoint); 209*dda28197Spatrick break; 210*dda28197Spatrick } 211*dda28197Spatrick lldbassert(escaped_len > 0 && "unknown string escape style"); 212*dda28197Spatrick return {data, escaped_len}; 213061da546Spatrick } 214061da546Spatrick 215061da546Spatrick // Given a sequence of bytes, this function returns: a sequence of bytes to 216061da546Spatrick // actually print out + a length the following unscanned position of the buffer 217061da546Spatrick // is in next 218*dda28197Spatrick static DecodedCharBuffer GetPrintable(StringElementType type, uint8_t *buffer, 219*dda28197Spatrick uint8_t *buffer_end, uint8_t *&next, 220*dda28197Spatrick StringPrinter::EscapeStyle escape_style) { 221*dda28197Spatrick if (!buffer || buffer >= buffer_end) 222061da546Spatrick return {nullptr}; 223061da546Spatrick 224061da546Spatrick switch (type) { 225*dda28197Spatrick case StringElementType::ASCII: 226*dda28197Spatrick return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next, 227*dda28197Spatrick escape_style); 228*dda28197Spatrick case StringElementType::UTF8: 229*dda28197Spatrick return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next, 230*dda28197Spatrick escape_style); 231061da546Spatrick default: 232061da546Spatrick return {nullptr}; 233061da546Spatrick } 234061da546Spatrick } 235061da546Spatrick 236*dda28197Spatrick static EscapingHelper 237*dda28197Spatrick GetDefaultEscapingHelper(GetPrintableElementType elem_type, 238*dda28197Spatrick StringPrinter::EscapeStyle escape_style) { 239061da546Spatrick switch (elem_type) { 240061da546Spatrick case GetPrintableElementType::UTF8: 241061da546Spatrick case GetPrintableElementType::ASCII: 242*dda28197Spatrick return [escape_style, elem_type](uint8_t *buffer, uint8_t *buffer_end, 243*dda28197Spatrick uint8_t *&next) -> DecodedCharBuffer { 244*dda28197Spatrick return GetPrintable(elem_type == GetPrintableElementType::UTF8 245*dda28197Spatrick ? StringElementType::UTF8 246*dda28197Spatrick : StringElementType::ASCII, 247*dda28197Spatrick buffer, buffer_end, next, escape_style); 248061da546Spatrick }; 249061da546Spatrick } 250061da546Spatrick llvm_unreachable("bad element type"); 251061da546Spatrick } 252061da546Spatrick 253*dda28197Spatrick /// Read a string encoded in accordance with \tparam SourceDataType from a 254*dda28197Spatrick /// host-side LLDB buffer, then pretty-print it to a stream using \p style. 255061da546Spatrick template <typename SourceDataType> 256*dda28197Spatrick static bool DumpEncodedBufferToStream( 257*dda28197Spatrick GetPrintableElementType style, 258061da546Spatrick llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 259061da546Spatrick const SourceDataType *, 260061da546Spatrick llvm::UTF8 **, llvm::UTF8 *, 261061da546Spatrick llvm::ConversionFlags), 262061da546Spatrick const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) { 263*dda28197Spatrick assert(dump_options.GetStream() && "need a Stream to print the string to"); 264061da546Spatrick Stream &stream(*dump_options.GetStream()); 265061da546Spatrick if (dump_options.GetPrefixToken() != nullptr) 266061da546Spatrick stream.Printf("%s", dump_options.GetPrefixToken()); 267061da546Spatrick if (dump_options.GetQuote() != 0) 268061da546Spatrick stream.Printf("%c", dump_options.GetQuote()); 269061da546Spatrick auto data(dump_options.GetData()); 270061da546Spatrick auto source_size(dump_options.GetSourceSize()); 271061da546Spatrick if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) { 272061da546Spatrick const int bufferSPSize = data.GetByteSize(); 273061da546Spatrick if (dump_options.GetSourceSize() == 0) { 274061da546Spatrick const int origin_encoding = 8 * sizeof(SourceDataType); 275061da546Spatrick source_size = bufferSPSize / (origin_encoding / 4); 276061da546Spatrick } 277061da546Spatrick 278061da546Spatrick const SourceDataType *data_ptr = 279061da546Spatrick (const SourceDataType *)data.GetDataStart(); 280061da546Spatrick const SourceDataType *data_end_ptr = data_ptr + source_size; 281061da546Spatrick 282061da546Spatrick const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 283061da546Spatrick 284061da546Spatrick if (zero_is_terminator) { 285061da546Spatrick while (data_ptr < data_end_ptr) { 286061da546Spatrick if (!*data_ptr) { 287061da546Spatrick data_end_ptr = data_ptr; 288061da546Spatrick break; 289061da546Spatrick } 290061da546Spatrick data_ptr++; 291061da546Spatrick } 292061da546Spatrick 293061da546Spatrick data_ptr = (const SourceDataType *)data.GetDataStart(); 294061da546Spatrick } 295061da546Spatrick 296061da546Spatrick lldb::DataBufferSP utf8_data_buffer_sp; 297061da546Spatrick llvm::UTF8 *utf8_data_ptr = nullptr; 298061da546Spatrick llvm::UTF8 *utf8_data_end_ptr = nullptr; 299061da546Spatrick 300061da546Spatrick if (ConvertFunction) { 301061da546Spatrick utf8_data_buffer_sp = 302061da546Spatrick std::make_shared<DataBufferHeap>(4 * bufferSPSize, 0); 303061da546Spatrick utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 304061da546Spatrick utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 305061da546Spatrick ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr, 306061da546Spatrick utf8_data_end_ptr, llvm::lenientConversion); 307061da546Spatrick if (!zero_is_terminator) 308061da546Spatrick utf8_data_end_ptr = utf8_data_ptr; 309061da546Spatrick // needed because the ConvertFunction will change the value of the 310061da546Spatrick // data_ptr. 311061da546Spatrick utf8_data_ptr = 312061da546Spatrick (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 313061da546Spatrick } else { 314061da546Spatrick // just copy the pointers - the cast is necessary to make the compiler 315061da546Spatrick // happy but this should only happen if we are reading UTF8 data 316061da546Spatrick utf8_data_ptr = const_cast<llvm::UTF8 *>( 317061da546Spatrick reinterpret_cast<const llvm::UTF8 *>(data_ptr)); 318061da546Spatrick utf8_data_end_ptr = const_cast<llvm::UTF8 *>( 319061da546Spatrick reinterpret_cast<const llvm::UTF8 *>(data_end_ptr)); 320061da546Spatrick } 321061da546Spatrick 322061da546Spatrick const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 323*dda28197Spatrick EscapingHelper escaping_callback; 324*dda28197Spatrick if (escape_non_printables) 325061da546Spatrick escaping_callback = 326*dda28197Spatrick GetDefaultEscapingHelper(style, dump_options.GetEscapeStyle()); 327061da546Spatrick 328061da546Spatrick // since we tend to accept partial data (and even partially malformed data) 329061da546Spatrick // we might end up with no NULL terminator before the end_ptr hence we need 330061da546Spatrick // to take a slower route and ensure we stay within boundaries 331061da546Spatrick for (; utf8_data_ptr < utf8_data_end_ptr;) { 332061da546Spatrick if (zero_is_terminator && !*utf8_data_ptr) 333061da546Spatrick break; 334061da546Spatrick 335061da546Spatrick if (escape_non_printables) { 336061da546Spatrick uint8_t *next_data = nullptr; 337061da546Spatrick auto printable = 338061da546Spatrick escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); 339061da546Spatrick auto printable_bytes = printable.GetBytes(); 340061da546Spatrick auto printable_size = printable.GetSize(); 341*dda28197Spatrick 342*dda28197Spatrick // We failed to figure out how to print this string. 343*dda28197Spatrick if (!printable_bytes || !next_data) 344*dda28197Spatrick return false; 345*dda28197Spatrick 346061da546Spatrick for (unsigned c = 0; c < printable_size; c++) 347061da546Spatrick stream.Printf("%c", *(printable_bytes + c)); 348061da546Spatrick utf8_data_ptr = (uint8_t *)next_data; 349061da546Spatrick } else { 350061da546Spatrick stream.Printf("%c", *utf8_data_ptr); 351061da546Spatrick utf8_data_ptr++; 352061da546Spatrick } 353061da546Spatrick } 354061da546Spatrick } 355061da546Spatrick if (dump_options.GetQuote() != 0) 356061da546Spatrick stream.Printf("%c", dump_options.GetQuote()); 357061da546Spatrick if (dump_options.GetSuffixToken() != nullptr) 358061da546Spatrick stream.Printf("%s", dump_options.GetSuffixToken()); 359061da546Spatrick if (dump_options.GetIsTruncated()) 360061da546Spatrick stream.Printf("..."); 361061da546Spatrick return true; 362061da546Spatrick } 363061da546Spatrick 364061da546Spatrick lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions:: 365061da546Spatrick ReadStringAndDumpToStreamOptions(ValueObject &valobj) 366061da546Spatrick : ReadStringAndDumpToStreamOptions() { 367061da546Spatrick SetEscapeNonPrintables( 368061da546Spatrick valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 369061da546Spatrick } 370061da546Spatrick 371061da546Spatrick lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 372061da546Spatrick ReadBufferAndDumpToStreamOptions(ValueObject &valobj) 373061da546Spatrick : ReadBufferAndDumpToStreamOptions() { 374061da546Spatrick SetEscapeNonPrintables( 375061da546Spatrick valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 376061da546Spatrick } 377061da546Spatrick 378061da546Spatrick lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 379061da546Spatrick ReadBufferAndDumpToStreamOptions( 380061da546Spatrick const ReadStringAndDumpToStreamOptions &options) 381061da546Spatrick : ReadBufferAndDumpToStreamOptions() { 382061da546Spatrick SetStream(options.GetStream()); 383061da546Spatrick SetPrefixToken(options.GetPrefixToken()); 384061da546Spatrick SetSuffixToken(options.GetSuffixToken()); 385061da546Spatrick SetQuote(options.GetQuote()); 386061da546Spatrick SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 387061da546Spatrick SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 388*dda28197Spatrick SetEscapeStyle(options.GetEscapeStyle()); 389061da546Spatrick } 390061da546Spatrick 391061da546Spatrick namespace lldb_private { 392061da546Spatrick 393061da546Spatrick namespace formatters { 394061da546Spatrick 395061da546Spatrick template <typename SourceDataType> 396*dda28197Spatrick static bool ReadEncodedBufferAndDumpToStream( 397*dda28197Spatrick StringElementType elem_type, 398061da546Spatrick const StringPrinter::ReadStringAndDumpToStreamOptions &options, 399061da546Spatrick llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 400061da546Spatrick const SourceDataType *, 401061da546Spatrick llvm::UTF8 **, llvm::UTF8 *, 402061da546Spatrick llvm::ConversionFlags)) { 403061da546Spatrick assert(options.GetStream() && "need a Stream to print the string to"); 404*dda28197Spatrick if (!options.GetStream()) 405*dda28197Spatrick return false; 406061da546Spatrick 407061da546Spatrick if (options.GetLocation() == 0 || 408061da546Spatrick options.GetLocation() == LLDB_INVALID_ADDRESS) 409061da546Spatrick return false; 410061da546Spatrick 411061da546Spatrick lldb::ProcessSP process_sp(options.GetProcessSP()); 412061da546Spatrick if (!process_sp) 413061da546Spatrick return false; 414061da546Spatrick 415*dda28197Spatrick constexpr int type_width = sizeof(SourceDataType); 416*dda28197Spatrick constexpr int origin_encoding = 8 * type_width; 417061da546Spatrick if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 418061da546Spatrick return false; 419*dda28197Spatrick // If not UTF8 or ASCII, conversion to UTF8 is necessary. 420061da546Spatrick if (origin_encoding != 8 && !ConvertFunction) 421061da546Spatrick return false; 422061da546Spatrick 423061da546Spatrick bool needs_zero_terminator = options.GetNeedsZeroTermination(); 424061da546Spatrick 425061da546Spatrick bool is_truncated = false; 426061da546Spatrick const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 427061da546Spatrick 428*dda28197Spatrick uint32_t sourceSize; 429*dda28197Spatrick if (elem_type == StringElementType::ASCII && !options.GetSourceSize()) { 430*dda28197Spatrick // FIXME: The NSString formatter sets HasSourceSize(true) when the size is 431*dda28197Spatrick // actually unknown, as well as SetBinaryZeroIsTerminator(false). IIUC the 432*dda28197Spatrick // C++ formatter also sets SetBinaryZeroIsTerminator(false) when it doesn't 433*dda28197Spatrick // mean to. I don't see how this makes sense: we should fix the formatters. 434*dda28197Spatrick // 435*dda28197Spatrick // Until then, the behavior that's expected for ASCII strings with unknown 436*dda28197Spatrick // lengths is to read up to the max size and then null-terminate. Do that. 437061da546Spatrick sourceSize = max_size; 438061da546Spatrick needs_zero_terminator = true; 439*dda28197Spatrick } else if (options.HasSourceSize()) { 440*dda28197Spatrick sourceSize = options.GetSourceSize(); 441*dda28197Spatrick if (!options.GetIgnoreMaxLength()) { 442061da546Spatrick if (sourceSize > max_size) { 443061da546Spatrick sourceSize = max_size; 444061da546Spatrick is_truncated = true; 445061da546Spatrick } 446061da546Spatrick } 447*dda28197Spatrick } else { 448*dda28197Spatrick sourceSize = max_size; 449*dda28197Spatrick needs_zero_terminator = true; 450*dda28197Spatrick } 451061da546Spatrick 452061da546Spatrick const int bufferSPSize = sourceSize * type_width; 453061da546Spatrick lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0)); 454061da546Spatrick 455*dda28197Spatrick // Check if we got bytes. We never get any bytes if we have an empty 456*dda28197Spatrick // string, but we still continue so that we end up actually printing 457*dda28197Spatrick // an empty string (""). 458*dda28197Spatrick if (sourceSize != 0 && !buffer_sp->GetBytes()) 459061da546Spatrick return false; 460061da546Spatrick 461061da546Spatrick Status error; 462061da546Spatrick char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 463061da546Spatrick 464*dda28197Spatrick if (elem_type == StringElementType::ASCII) 465*dda28197Spatrick process_sp->ReadCStringFromMemory(options.GetLocation(), buffer, 466*dda28197Spatrick bufferSPSize, error); 467*dda28197Spatrick else if (needs_zero_terminator) 468061da546Spatrick process_sp->ReadStringFromMemory(options.GetLocation(), buffer, 469061da546Spatrick bufferSPSize, error, type_width); 470061da546Spatrick else 471*dda28197Spatrick process_sp->ReadMemoryFromInferior(options.GetLocation(), buffer, 472061da546Spatrick bufferSPSize, error); 473061da546Spatrick if (error.Fail()) { 474061da546Spatrick options.GetStream()->Printf("unable to read data"); 475061da546Spatrick return true; 476061da546Spatrick } 477061da546Spatrick 478061da546Spatrick DataExtractor data(buffer_sp, process_sp->GetByteOrder(), 479061da546Spatrick process_sp->GetAddressByteSize()); 480061da546Spatrick 481061da546Spatrick StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); 482061da546Spatrick dump_options.SetData(data); 483061da546Spatrick dump_options.SetSourceSize(sourceSize); 484061da546Spatrick dump_options.SetIsTruncated(is_truncated); 485*dda28197Spatrick dump_options.SetNeedsZeroTermination(needs_zero_terminator); 486*dda28197Spatrick if (needs_zero_terminator) 487*dda28197Spatrick dump_options.SetBinaryZeroIsTerminator(true); 488061da546Spatrick 489*dda28197Spatrick GetPrintableElementType print_style = (elem_type == StringElementType::ASCII) 490*dda28197Spatrick ? GetPrintableElementType::ASCII 491*dda28197Spatrick : GetPrintableElementType::UTF8; 492*dda28197Spatrick return DumpEncodedBufferToStream(print_style, ConvertFunction, dump_options); 493061da546Spatrick } 494061da546Spatrick 495061da546Spatrick template <> 496*dda28197Spatrick bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF8>( 497061da546Spatrick const ReadStringAndDumpToStreamOptions &options) { 498*dda28197Spatrick return ReadEncodedBufferAndDumpToStream<llvm::UTF8>(StringElementType::UTF8, 499*dda28197Spatrick options, nullptr); 500061da546Spatrick } 501061da546Spatrick 502061da546Spatrick template <> 503*dda28197Spatrick bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF16>( 504061da546Spatrick const ReadStringAndDumpToStreamOptions &options) { 505*dda28197Spatrick return ReadEncodedBufferAndDumpToStream<llvm::UTF16>( 506*dda28197Spatrick StringElementType::UTF16, options, llvm::ConvertUTF16toUTF8); 507061da546Spatrick } 508061da546Spatrick 509061da546Spatrick template <> 510*dda28197Spatrick bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF32>( 511061da546Spatrick const ReadStringAndDumpToStreamOptions &options) { 512*dda28197Spatrick return ReadEncodedBufferAndDumpToStream<llvm::UTF32>( 513*dda28197Spatrick StringElementType::UTF32, options, llvm::ConvertUTF32toUTF8); 514061da546Spatrick } 515061da546Spatrick 516061da546Spatrick template <> 517*dda28197Spatrick bool StringPrinter::ReadStringAndDumpToStream<StringElementType::ASCII>( 518*dda28197Spatrick const ReadStringAndDumpToStreamOptions &options) { 519*dda28197Spatrick return ReadEncodedBufferAndDumpToStream<char>(StringElementType::ASCII, 520*dda28197Spatrick options, nullptr); 521*dda28197Spatrick } 522*dda28197Spatrick 523*dda28197Spatrick template <> 524*dda28197Spatrick bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF8>( 525061da546Spatrick const ReadBufferAndDumpToStreamOptions &options) { 526*dda28197Spatrick return DumpEncodedBufferToStream<llvm::UTF8>(GetPrintableElementType::UTF8, 527*dda28197Spatrick nullptr, options); 528061da546Spatrick } 529061da546Spatrick 530061da546Spatrick template <> 531*dda28197Spatrick bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF16>( 532061da546Spatrick const ReadBufferAndDumpToStreamOptions &options) { 533*dda28197Spatrick return DumpEncodedBufferToStream(GetPrintableElementType::UTF8, 534*dda28197Spatrick llvm::ConvertUTF16toUTF8, options); 535*dda28197Spatrick } 536*dda28197Spatrick 537*dda28197Spatrick template <> 538*dda28197Spatrick bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF32>( 539*dda28197Spatrick const ReadBufferAndDumpToStreamOptions &options) { 540*dda28197Spatrick return DumpEncodedBufferToStream(GetPrintableElementType::UTF8, 541*dda28197Spatrick llvm::ConvertUTF32toUTF8, options); 542*dda28197Spatrick } 543*dda28197Spatrick 544*dda28197Spatrick template <> 545*dda28197Spatrick bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::ASCII>( 546*dda28197Spatrick const ReadBufferAndDumpToStreamOptions &options) { 547*dda28197Spatrick // Treat ASCII the same as UTF8. 548*dda28197Spatrick // 549*dda28197Spatrick // FIXME: This is probably not the right thing to do (well, it's debatable). 550*dda28197Spatrick // If an ASCII-encoded string happens to contain a sequence of invalid bytes 551*dda28197Spatrick // that forms a valid UTF8 character, we'll print out that character. This is 552*dda28197Spatrick // good if you're playing fast and loose with encodings (probably good for 553*dda28197Spatrick // std::string users), but maybe not so good if you care about your string 554*dda28197Spatrick // formatter respecting the semantics of your selected string encoding. In 555*dda28197Spatrick // the latter case you'd want to see the character byte sequence ('\x..'), not 556*dda28197Spatrick // the UTF8 character itself. 557061da546Spatrick return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 558061da546Spatrick } 559061da546Spatrick 560061da546Spatrick } // namespace formatters 561061da546Spatrick 562061da546Spatrick } // namespace lldb_private 563