xref: /llvm-project/lldb/examples/summaries/cocoa/CFString.py (revision fd35a92300a00edaf56ae94176317390677569a4)
1"""
2LLDB AppKit formatters
3
4Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5See https://llvm.org/LICENSE.txt for license information.
6SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7"""
8# example synthetic children and summary provider for CFString (and related NSString class)
9# the real code is part of the LLDB core
10import lldb
11import lldb.runtime.objc.objc_runtime
12import lldb.formatters.Logger
13
14try:
15    unichr
16except NameError:
17    unichr = chr
18
19
20def CFString_SummaryProvider(valobj, dict):
21    logger = lldb.formatters.Logger.Logger()
22    provider = CFStringSynthProvider(valobj, dict)
23    if not provider.invalid:
24        try:
25            summary = provider.get_child_at_index(provider.get_child_index("content"))
26            if isinstance(summary, lldb.SBValue):
27                summary = summary.GetSummary()
28            else:
29                summary = '"' + summary + '"'
30        except:
31            summary = None
32        if summary is None:
33            summary = "<variable is not NSString>"
34        return "@" + summary
35    return ""
36
37
38def CFAttributedString_SummaryProvider(valobj, dict):
39    logger = lldb.formatters.Logger.Logger()
40    offset = valobj.GetTarget().GetProcess().GetAddressByteSize()
41    pointee = valobj.GetValueAsUnsigned(0)
42    summary = "<variable is not NSAttributedString>"
43    if pointee is not None and pointee != 0:
44        pointee = pointee + offset
45        child_ptr = valobj.CreateValueFromAddress(
46            "string_ptr", pointee, valobj.GetType()
47        )
48        child = child_ptr.CreateValueFromAddress(
49            "string_data", child_ptr.GetValueAsUnsigned(), valobj.GetType()
50        ).AddressOf()
51        provider = CFStringSynthProvider(child, dict)
52        if not provider.invalid:
53            try:
54                summary = provider.get_child_at_index(
55                    provider.get_child_index("content")
56                ).GetSummary()
57            except:
58                summary = "<variable is not NSAttributedString>"
59    if summary is None:
60        summary = "<variable is not NSAttributedString>"
61    return "@" + summary
62
63
64def __lldb_init_module(debugger, dict):
65    debugger.HandleCommand(
66        "type summary add -F CFString.CFString_SummaryProvider NSString CFStringRef CFMutableStringRef"
67    )
68    debugger.HandleCommand(
69        "type summary add -F CFString.CFAttributedString_SummaryProvider NSAttributedString"
70    )
71
72
73class CFStringSynthProvider:
74    def __init__(self, valobj, dict):
75        logger = lldb.formatters.Logger.Logger()
76        self.valobj = valobj
77        self.update()
78
79    # children other than "content" are for debugging only and must not be
80    # used in production code
81    def num_children(self):
82        logger = lldb.formatters.Logger.Logger()
83        if self.invalid:
84            return 0
85        return 6
86
87    def read_unicode(self, pointer, max_len=2048):
88        logger = lldb.formatters.Logger.Logger()
89        process = self.valobj.GetTarget().GetProcess()
90        error = lldb.SBError()
91        pystr = ""
92        # cannot do the read at once because the length value has
93        # a weird encoding. better play it safe here
94        while max_len > 0:
95            content = process.ReadMemory(pointer, 2, error)
96            new_bytes = bytearray(content)
97            b0 = new_bytes[0]
98            b1 = new_bytes[1]
99            pointer = pointer + 2
100            if b0 == 0 and b1 == 0:
101                break
102            # rearrange bytes depending on endianness
103            # (do we really need this or is Cocoa going to
104            #  use Windows-compatible little-endian even
105            #  if the target is big endian?)
106            if self.is_little:
107                value = b1 * 256 + b0
108            else:
109                value = b0 * 256 + b1
110            pystr = pystr + unichr(value)
111            # read max_len unicode values, not max_len bytes
112            max_len = max_len - 1
113        return pystr
114
115    # handle the special case strings
116    # only use the custom code for the tested LP64 case
117    def handle_special(self):
118        logger = lldb.formatters.Logger.Logger()
119        if not self.is_64_bit:
120            # for 32bit targets, use safe ObjC code
121            return self.handle_unicode_string_safe()
122        offset = 12
123        pointer = self.valobj.GetValueAsUnsigned(0) + offset
124        pystr = self.read_unicode(pointer)
125        return self.valobj.CreateValueFromExpression(
126            "content", '(char*)"' + pystr.encode("utf-8") + '"'
127        )
128
129    # last resort call, use ObjC code to read; the final aim is to
130    # be able to strip this call away entirely and only do the read
131    # ourselves
132    def handle_unicode_string_safe(self):
133        return self.valobj.CreateValueFromExpression(
134            "content", '(char*)"' + self.valobj.GetObjectDescription() + '"'
135        )
136
137    def handle_unicode_string(self):
138        logger = lldb.formatters.Logger.Logger()
139        # step 1: find offset
140        if self.inline:
141            pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base()
142            if not self.explicit:
143                # untested, use the safe code path
144                return self.handle_unicode_string_safe()
145            else:
146                # a full pointer is skipped here before getting to the live
147                # data
148                pointer = pointer + self.pointer_size
149        else:
150            pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base()
151            # read 8 bytes here and make an address out of them
152            try:
153                char_type = (
154                    self.valobj.GetType()
155                    .GetBasicType(lldb.eBasicTypeChar)
156                    .GetPointerType()
157                )
158                vopointer = self.valobj.CreateValueFromAddress(
159                    "dummy", pointer, char_type
160                )
161                pointer = vopointer.GetValueAsUnsigned(0)
162            except:
163                return self.valobj.CreateValueFromExpression(
164                    "content", '(char*)"@"invalid NSString""'
165                )
166        # step 2: read Unicode data at pointer
167        pystr = self.read_unicode(pointer)
168        # step 3: return it
169        return pystr.encode("utf-8")
170
171    def handle_inline_explicit(self):
172        logger = lldb.formatters.Logger.Logger()
173        offset = 3 * self.pointer_size
174        offset = offset + self.valobj.GetValueAsUnsigned(0)
175        return self.valobj.CreateValueFromExpression(
176            "content", "(char*)(" + str(offset) + ")"
177        )
178
179    def handle_mutable_string(self):
180        logger = lldb.formatters.Logger.Logger()
181        offset = 2 * self.pointer_size
182        data = self.valobj.CreateChildAtOffset(
183            "content",
184            offset,
185            self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType(),
186        )
187        data_value = data.GetValueAsUnsigned(0)
188        if self.explicit and self.unicode:
189            return self.read_unicode(data_value).encode("utf-8")
190        else:
191            data_value = data_value + 1
192            return self.valobj.CreateValueFromExpression(
193                "content", "(char*)(" + str(data_value) + ")"
194            )
195
196    def handle_UTF8_inline(self):
197        logger = lldb.formatters.Logger.Logger()
198        offset = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base()
199        if not self.explicit:
200            offset = offset + 1
201        return self.valobj.CreateValueFromAddress(
202            "content", offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)
203        ).AddressOf()
204
205    def handle_UTF8_not_inline(self):
206        logger = lldb.formatters.Logger.Logger()
207        offset = self.size_of_cfruntime_base()
208        return self.valobj.CreateChildAtOffset(
209            "content",
210            offset,
211            self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType(),
212        )
213
214    def get_child_at_index(self, index):
215        logger = lldb.formatters.Logger.Logger()
216        logger >> "Querying for child [" + str(index) + "]"
217        if index == 0:
218            return self.valobj.CreateValueFromExpression(
219                "mutable", str(int(self.mutable))
220            )
221        if index == 1:
222            return self.valobj.CreateValueFromExpression(
223                "inline", str(int(self.inline))
224            )
225        if index == 2:
226            return self.valobj.CreateValueFromExpression(
227                "explicit", str(int(self.explicit))
228            )
229        if index == 3:
230            return self.valobj.CreateValueFromExpression(
231                "unicode", str(int(self.unicode))
232            )
233        if index == 4:
234            return self.valobj.CreateValueFromExpression(
235                "special", str(int(self.special))
236            )
237        if index == 5:
238            # we are handling the several possible combinations of flags.
239            # for each known combination we have a function that knows how to
240            # go fetch the data from memory instead of running code. if a string is not
241            # correctly displayed, one should start by finding a combination of flags that
242            # makes it different from these known cases, and provide a new reader function
243            # if this is not possible, a new flag might have to be made up (like the "special" flag
244            # below, which is not a real flag in CFString), or alternatively one might need to use
245            # the ObjC runtime helper to detect the new class and deal with it accordingly
246            # print 'mutable = ' + str(self.mutable)
247            # print 'inline = ' + str(self.inline)
248            # print 'explicit = ' + str(self.explicit)
249            # print 'unicode = ' + str(self.unicode)
250            # print 'special = ' + str(self.special)
251            if self.mutable:
252                return self.handle_mutable_string()
253            elif (
254                self.inline
255                and self.explicit
256                and not self.unicode
257                and not self.special
258                and not self.mutable
259            ):
260                return self.handle_inline_explicit()
261            elif self.unicode:
262                return self.handle_unicode_string()
263            elif self.special:
264                return self.handle_special()
265            elif self.inline:
266                return self.handle_UTF8_inline()
267            else:
268                return self.handle_UTF8_not_inline()
269
270    def get_child_index(self, name):
271        logger = lldb.formatters.Logger.Logger()
272        logger >> "Querying for child ['" + str(name) + "']"
273        if name == "content":
274            return self.num_children() - 1
275        if name == "mutable":
276            return 0
277        if name == "inline":
278            return 1
279        if name == "explicit":
280            return 2
281        if name == "unicode":
282            return 3
283        if name == "special":
284            return 4
285
286    # CFRuntimeBase is defined as having an additional
287    # 4 bytes (padding?) on LP64 architectures
288    # to get its size we add up sizeof(pointer)+4
289    # and then add 4 more bytes if we are on a 64bit system
290    def size_of_cfruntime_base(self):
291        logger = lldb.formatters.Logger.Logger()
292        return self.pointer_size + 4 + (4 if self.is_64_bit else 0)
293
294    # the info bits are part of the CFRuntimeBase structure
295    # to get at them we have to skip a uintptr_t and then get
296    # at the least-significant byte of a 4 byte array. If we are
297    # on big-endian this means going to byte 3, if we are on
298    # little endian (OSX & iOS), this means reading byte 0
299    def offset_of_info_bits(self):
300        logger = lldb.formatters.Logger.Logger()
301        offset = self.pointer_size
302        if not self.is_little:
303            offset = offset + 3
304        return offset
305
306    def read_info_bits(self):
307        logger = lldb.formatters.Logger.Logger()
308        cfinfo = self.valobj.CreateChildAtOffset(
309            "cfinfo",
310            self.offset_of_info_bits(),
311            self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar),
312        )
313        cfinfo.SetFormat(11)
314        info = cfinfo.GetValue()
315        if info is not None:
316            self.invalid = False
317            return int(info, 0)
318        else:
319            self.invalid = True
320            return None
321
322    # calculating internal flag bits of the CFString object
323    # this stuff is defined and discussed in CFString.c
324    def is_mutable(self):
325        logger = lldb.formatters.Logger.Logger()
326        return (self.info_bits & 1) == 1
327
328    def is_inline(self):
329        logger = lldb.formatters.Logger.Logger()
330        return (self.info_bits & 0x60) == 0
331
332    # this flag's name is ambiguous, it turns out
333    # we must skip a length byte to get at the data
334    # when this flag is False
335    def has_explicit_length(self):
336        logger = lldb.formatters.Logger.Logger()
337        return (self.info_bits & (1 | 4)) != 4
338
339    # probably a subclass of NSString. obtained this from [str pathExtension]
340    # here info_bits = 0 and Unicode data at the start of the padding word
341    # in the long run using the isa value might be safer as a way to identify this
342    # instead of reading the info_bits
343    def is_special_case(self):
344        logger = lldb.formatters.Logger.Logger()
345        return self.info_bits == 0
346
347    def is_unicode(self):
348        logger = lldb.formatters.Logger.Logger()
349        return (self.info_bits & 0x10) == 0x10
350
351    # preparing ourselves to read into memory
352    # by adjusting architecture-specific info
353    def adjust_for_architecture(self):
354        logger = lldb.formatters.Logger.Logger()
355        self.pointer_size = self.valobj.GetTarget().GetProcess().GetAddressByteSize()
356        self.is_64_bit = self.pointer_size == 8
357        self.is_little = (
358            self.valobj.GetTarget().GetProcess().GetByteOrder() == lldb.eByteOrderLittle
359        )
360
361    # reading info bits out of the CFString and computing
362    # useful values to get at the real data
363    def compute_flags(self):
364        logger = lldb.formatters.Logger.Logger()
365        self.info_bits = self.read_info_bits()
366        if self.info_bits is None:
367            return
368        self.mutable = self.is_mutable()
369        self.inline = self.is_inline()
370        self.explicit = self.has_explicit_length()
371        self.unicode = self.is_unicode()
372        self.special = self.is_special_case()
373
374    def update(self):
375        logger = lldb.formatters.Logger.Logger()
376        self.adjust_for_architecture()
377        self.compute_flags()
378