xref: /llvm-project/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp (revision 487ab39a5082098f92e886de606505f25031a22a)
1 #include <string>
2 #include <stdint.h>
3 
4 // For more information about libc++'s std::string ABI, see:
5 //
6 //   https://joellaity.com/2020/01/31/string.html
7 
8 // A corrupt string which hits the SSO code path, but has an invalid size.
9 static struct {
10 #if _LIBCPP_ABI_VERSION == 1
11   // Set the size of this short-mode string to 116. Note that in short mode,
12   // the size is encoded as `size << 1`.
13   unsigned char size = 232;
14 
15   // 23 garbage bytes for the inline string payload.
16   char inline_buf[23] = {0};
17 #else  // _LIBCPP_ABI_VERSION == 1
18   // Like above, but data comes first, and use bitfields to indicate size.
19   char inline_buf[23] = {0};
20   unsigned char size : 7 = 116;
21   unsigned char is_long : 1 = 0;
22 #endif // #if _LIBCPP_ABI_VERSION == 1
23 } garbage_string_short_mode;
24 
25 // A corrupt libcxx string in long mode with a payload that contains a utf8
26 // sequence that's inherently too long.
27 static unsigned char garbage_utf8_payload1[] = {
28   250, // This means that we expect a 5-byte sequence, this is invalid. LLDB
29        // should fall back to ASCII printing.
30   250, 250, 250
31 };
32 static struct {
33 #if _LIBCPP_ABI_VERSION == 1
34   uint64_t cap = 5;
35   uint64_t size = 4;
36   unsigned char *data = &garbage_utf8_payload1[0];
37 #else  // _LIBCPP_ABI_VERSION == 1
38   unsigned char *data = &garbage_utf8_payload1[0];
39   uint64_t size = 4;
40   uint64_t cap : 63 = 4;
41   uint64_t is_long : 1 = 1;
42 #endif // #if _LIBCPP_ABI_VERSION == 1
43 } garbage_string_long_mode1;
44 
45 // A corrupt libcxx string in long mode with a payload that contains a utf8
46 // sequence that's too long to fit in the buffer.
47 static unsigned char garbage_utf8_payload2[] = {
48   240, // This means that we expect a 4-byte sequence, but the buffer is too
49        // small for this. LLDB should fall back to ASCII printing.
50   240
51 };
52 static struct {
53 #if _LIBCPP_ABI_VERSION == 1
54   uint64_t cap = 3;
55   uint64_t size = 2;
56   unsigned char *data = &garbage_utf8_payload2[0];
57 #else  // _LIBCPP_ABI_VERSION == 1
58   unsigned char *data = &garbage_utf8_payload2[0];
59   uint64_t size = 2;
60   uint64_t cap : 63 = 3;
61   uint64_t is_long : 1 = 1;
62 #endif // #if _LIBCPP_ABI_VERSION == 1
63 } garbage_string_long_mode2;
64 
65 // A corrupt libcxx string which has an invalid size (i.e. a size greater than
66 // the capacity of the string).
67 static struct {
68 #if _LIBCPP_ABI_VERSION == 1
69   uint64_t cap = 5;
70   uint64_t size = 7;
71   const char *data = "foo";
72 #else  // _LIBCPP_ABI_VERSION == 1
73   const char *data = "foo";
74   uint64_t size = 7;
75   uint64_t cap : 63 = 5;
76   uint64_t is_long : 1 = 1;
77 #endif // #if _LIBCPP_ABI_VERSION == 1
78 } garbage_string_long_mode3;
79 
80 // A corrupt libcxx string in long mode with a payload that would trigger a
81 // buffer overflow.
82 static struct {
83 #if _LIBCPP_ABI_VERSION == 1
84   uint64_t cap = 5;
85   uint64_t size = 2;
86   uint64_t data = 0xfffffffffffffffeULL;
87 #else  // _LIBCPP_ABI_VERSION == 1
88   uint64_t data = 0xfffffffffffffffeULL;
89   uint64_t size = 2;
90   uint64_t cap : 63 = 5;
91   uint64_t is_long : 1 = 1;
92 #endif // #if _LIBCPP_ABI_VERSION == 1
93 } garbage_string_long_mode4;
94 
touch_string(std::string & in_str)95 size_t touch_string(std::string &in_str)
96 {
97   return in_str.size(); // Break here to look at bad string
98 }
99 
main()100 int main()
101 {
102     std::wstring wempty(L"");
103     std::wstring s(L"hello world! מזל טוב!");
104     std::wstring S(L"!!!!");
105     const wchar_t *mazeltov = L"מזל טוב";
106     std::string empty("");
107     std::string q("hello world");
108     std::string Q("quite a long std::strin with lots of info inside it");
109     std::string TheVeryLongOne("1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890someText1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890");
110     std::string IHaveEmbeddedZeros("a\0b\0c\0d",7);
111     std::wstring IHaveEmbeddedZerosToo(L"hello world!\0てざ ル゜䋨ミ㠧槊 きゅへ狦穤襩 じゃ馩リョ 䤦監", 38);
112     std::u16string u16_string(u"ß水氶");
113     std::u16string u16_empty(u"");
114     std::u32string u32_string(U"��������");
115     std::u32string u32_empty(U"");
116     std::string *null_str = nullptr;
117 
118     std::string garbage1, garbage2, garbage3, garbage4, garbage5;
119     if (sizeof(std::string) == sizeof(garbage_string_short_mode))
120       memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string));
121     if (sizeof(std::string) == sizeof(garbage_string_long_mode1))
122       memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string));
123     if (sizeof(std::string) == sizeof(garbage_string_long_mode2))
124       memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string));
125     if (sizeof(std::string) == sizeof(garbage_string_long_mode3))
126       memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string));
127     if (sizeof(std::string) == sizeof(garbage_string_long_mode4))
128       memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string));
129 
130     S.assign(L"!!!!!"); // Set break point at this line.
131     std::string *not_a_string = (std::string *) 0x0;
132     touch_string(*not_a_string);
133     return 0;
134 }
135