xref: /llvm-project/lldb/examples/summaries/cocoa/CFString.py (revision b9c1b51e45b845debb76d8658edabca70ca56079)
1"""
2LLDB AppKit formatters
3
4part of The LLVM Compiler Infrastructure
5This file is distributed under the University of Illinois Open Source
6License. See LICENSE.TXT for details.
7"""
8# example synthetic children and summary provider for CFString (and related NSString class)
9# the real code is part of the LLDB core
10import lldb
11import lldb.runtime.objc.objc_runtime
12import lldb.formatters.Logger
13
14
15def CFString_SummaryProvider(valobj, dict):
16    logger = lldb.formatters.Logger.Logger()
17    provider = CFStringSynthProvider(valobj, dict)
18    if not provider.invalid:
19        try:
20            summary = provider.get_child_at_index(
21                provider.get_child_index("content"))
22            if isinstance(summary, lldb.SBValue):
23                summary = summary.GetSummary()
24            else:
25                summary = '"' + summary + '"'
26        except:
27            summary = None
28        if summary is None:
29            summary = '<variable is not NSString>'
30        return '@' + summary
31    return ''
32
33
34def CFAttributedString_SummaryProvider(valobj, dict):
35    logger = lldb.formatters.Logger.Logger()
36    offset = valobj.GetTarget().GetProcess().GetAddressByteSize()
37    pointee = valobj.GetValueAsUnsigned(0)
38    summary = '<variable is not NSAttributedString>'
39    if pointee is not None and pointee != 0:
40        pointee = pointee + offset
41        child_ptr = valobj.CreateValueFromAddress(
42            "string_ptr", pointee, valobj.GetType())
43        child = child_ptr.CreateValueFromAddress(
44            "string_data",
45            child_ptr.GetValueAsUnsigned(),
46            valobj.GetType()).AddressOf()
47        provider = CFStringSynthProvider(child, dict)
48        if not provider.invalid:
49            try:
50                summary = provider.get_child_at_index(
51                    provider.get_child_index("content")).GetSummary()
52            except:
53                summary = '<variable is not NSAttributedString>'
54    if summary is None:
55        summary = '<variable is not NSAttributedString>'
56    return '@' + summary
57
58
59def __lldb_init_module(debugger, dict):
60    debugger.HandleCommand(
61        "type summary add -F CFString.CFString_SummaryProvider NSString CFStringRef CFMutableStringRef")
62    debugger.HandleCommand(
63        "type summary add -F CFString.CFAttributedString_SummaryProvider NSAttributedString")
64
65
66class CFStringSynthProvider:
67
68    def __init__(self, valobj, dict):
69        logger = lldb.formatters.Logger.Logger()
70        self.valobj = valobj
71        self.update()
72
73    # children other than "content" are for debugging only and must not be
74    # used in production code
75    def num_children(self):
76        logger = lldb.formatters.Logger.Logger()
77        if self.invalid:
78            return 0
79        return 6
80
81    def read_unicode(self, pointer, max_len=2048):
82        logger = lldb.formatters.Logger.Logger()
83        process = self.valobj.GetTarget().GetProcess()
84        error = lldb.SBError()
85        pystr = u''
86        # cannot do the read at once because the length value has
87        # a weird encoding. better play it safe here
88        while max_len > 0:
89            content = process.ReadMemory(pointer, 2, error)
90            new_bytes = bytearray(content)
91            b0 = new_bytes[0]
92            b1 = new_bytes[1]
93            pointer = pointer + 2
94            if b0 == 0 and b1 == 0:
95                break
96            # rearrange bytes depending on endianness
97            # (do we really need this or is Cocoa going to
98            #  use Windows-compatible little-endian even
99            #  if the target is big endian?)
100            if self.is_little:
101                value = b1 * 256 + b0
102            else:
103                value = b0 * 256 + b1
104            pystr = pystr + unichr(value)
105            # read max_len unicode values, not max_len bytes
106            max_len = max_len - 1
107        return pystr
108
109    # handle the special case strings
110    # only use the custom code for the tested LP64 case
111    def handle_special(self):
112        logger = lldb.formatters.Logger.Logger()
113        if not self.is_64_bit:
114            # for 32bit targets, use safe ObjC code
115            return self.handle_unicode_string_safe()
116        offset = 12
117        pointer = self.valobj.GetValueAsUnsigned(0) + offset
118        pystr = self.read_unicode(pointer)
119        return self.valobj.CreateValueFromExpression(
120            "content", "(char*)\"" + pystr.encode('utf-8') + "\"")
121
122    # last resort call, use ObjC code to read; the final aim is to
123    # be able to strip this call away entirely and only do the read
124    # ourselves
125    def handle_unicode_string_safe(self):
126        return self.valobj.CreateValueFromExpression(
127            "content", "(char*)\"" + self.valobj.GetObjectDescription() + "\"")
128
129    def handle_unicode_string(self):
130        logger = lldb.formatters.Logger.Logger()
131        # step 1: find offset
132        if self.inline:
133            pointer = self.valobj.GetValueAsUnsigned(
134                0) + self.size_of_cfruntime_base()
135            if not self.explicit:
136                # untested, use the safe code path
137                return self.handle_unicode_string_safe()
138            else:
139                # a full pointer is skipped here before getting to the live
140                # data
141                pointer = pointer + self.pointer_size
142        else:
143            pointer = self.valobj.GetValueAsUnsigned(
144                0) + self.size_of_cfruntime_base()
145            # read 8 bytes here and make an address out of them
146            try:
147                char_type = self.valobj.GetType().GetBasicType(
148                    lldb.eBasicTypeChar).GetPointerType()
149                vopointer = self.valobj.CreateValueFromAddress(
150                    "dummy", pointer, char_type)
151                pointer = vopointer.GetValueAsUnsigned(0)
152            except:
153                return self.valobj.CreateValueFromExpression(
154                    "content", '(char*)"@\"invalid NSString\""')
155        # step 2: read Unicode data at pointer
156        pystr = self.read_unicode(pointer)
157        # step 3: return it
158        return pystr.encode('utf-8')
159
160    def handle_inline_explicit(self):
161        logger = lldb.formatters.Logger.Logger()
162        offset = 3 * self.pointer_size
163        offset = offset + self.valobj.GetValueAsUnsigned(0)
164        return self.valobj.CreateValueFromExpression(
165            "content", "(char*)(" + str(offset) + ")")
166
167    def handle_mutable_string(self):
168        logger = lldb.formatters.Logger.Logger()
169        offset = 2 * self.pointer_size
170        data = self.valobj.CreateChildAtOffset(
171            "content", offset, self.valobj.GetType().GetBasicType(
172                lldb.eBasicTypeChar).GetPointerType())
173        data_value = data.GetValueAsUnsigned(0)
174        if self.explicit and self.unicode:
175            return self.read_unicode(data_value).encode('utf-8')
176        else:
177            data_value = data_value + 1
178            return self.valobj.CreateValueFromExpression(
179                "content", "(char*)(" + str(data_value) + ")")
180
181    def handle_UTF8_inline(self):
182        logger = lldb.formatters.Logger.Logger()
183        offset = self.valobj.GetValueAsUnsigned(
184            0) + self.size_of_cfruntime_base()
185        if not self.explicit:
186            offset = offset + 1
187        return self.valobj.CreateValueFromAddress(
188            "content", offset, self.valobj.GetType().GetBasicType(
189                lldb.eBasicTypeChar)).AddressOf()
190
191    def handle_UTF8_not_inline(self):
192        logger = lldb.formatters.Logger.Logger()
193        offset = self.size_of_cfruntime_base()
194        return self.valobj.CreateChildAtOffset(
195            "content", offset, self.valobj.GetType().GetBasicType(
196                lldb.eBasicTypeChar).GetPointerType())
197
198    def get_child_at_index(self, index):
199        logger = lldb.formatters.Logger.Logger()
200        logger >> "Querying for child [" + str(index) + "]"
201        if index == 0:
202            return self.valobj.CreateValueFromExpression(
203                "mutable", str(int(self.mutable)))
204        if index == 1:
205            return self.valobj.CreateValueFromExpression("inline",
206                                                         str(int(self.inline)))
207        if index == 2:
208            return self.valobj.CreateValueFromExpression(
209                "explicit", str(int(self.explicit)))
210        if index == 3:
211            return self.valobj.CreateValueFromExpression(
212                "unicode", str(int(self.unicode)))
213        if index == 4:
214            return self.valobj.CreateValueFromExpression(
215                "special", str(int(self.special)))
216        if index == 5:
217            # we are handling the several possible combinations of flags.
218            # for each known combination we have a function that knows how to
219            # go fetch the data from memory instead of running code. if a string is not
220            # correctly displayed, one should start by finding a combination of flags that
221            # makes it different from these known cases, and provide a new reader function
222            # if this is not possible, a new flag might have to be made up (like the "special" flag
223            # below, which is not a real flag in CFString), or alternatively one might need to use
224            # the ObjC runtime helper to detect the new class and deal with it accordingly
225            # print 'mutable = ' + str(self.mutable)
226            # print 'inline = ' + str(self.inline)
227            # print 'explicit = ' + str(self.explicit)
228            # print 'unicode = ' + str(self.unicode)
229            # print 'special = ' + str(self.special)
230            if self.mutable:
231                return self.handle_mutable_string()
232            elif self.inline and self.explicit and \
233                    self.unicode == False and self.special == False and \
234                    self.mutable == False:
235                return self.handle_inline_explicit()
236            elif self.unicode:
237                return self.handle_unicode_string()
238            elif self.special:
239                return self.handle_special()
240            elif self.inline:
241                return self.handle_UTF8_inline()
242            else:
243                return self.handle_UTF8_not_inline()
244
245    def get_child_index(self, name):
246        logger = lldb.formatters.Logger.Logger()
247        logger >> "Querying for child ['" + str(name) + "']"
248        if name == "content":
249            return self.num_children() - 1
250        if name == "mutable":
251            return 0
252        if name == "inline":
253            return 1
254        if name == "explicit":
255            return 2
256        if name == "unicode":
257            return 3
258        if name == "special":
259            return 4
260
261    # CFRuntimeBase is defined as having an additional
262    # 4 bytes (padding?) on LP64 architectures
263    # to get its size we add up sizeof(pointer)+4
264    # and then add 4 more bytes if we are on a 64bit system
265    def size_of_cfruntime_base(self):
266        logger = lldb.formatters.Logger.Logger()
267        return self.pointer_size + 4 + (4 if self.is_64_bit else 0)
268
269    # the info bits are part of the CFRuntimeBase structure
270    # to get at them we have to skip a uintptr_t and then get
271    # at the least-significant byte of a 4 byte array. If we are
272    # on big-endian this means going to byte 3, if we are on
273    # little endian (OSX & iOS), this means reading byte 0
274    def offset_of_info_bits(self):
275        logger = lldb.formatters.Logger.Logger()
276        offset = self.pointer_size
277        if not self.is_little:
278            offset = offset + 3
279        return offset
280
281    def read_info_bits(self):
282        logger = lldb.formatters.Logger.Logger()
283        cfinfo = self.valobj.CreateChildAtOffset(
284            "cfinfo",
285            self.offset_of_info_bits(),
286            self.valobj.GetType().GetBasicType(
287                lldb.eBasicTypeChar))
288        cfinfo.SetFormat(11)
289        info = cfinfo.GetValue()
290        if info is not None:
291            self.invalid = False
292            return int(info, 0)
293        else:
294            self.invalid = True
295            return None
296
297    # calculating internal flag bits of the CFString object
298    # this stuff is defined and discussed in CFString.c
299    def is_mutable(self):
300        logger = lldb.formatters.Logger.Logger()
301        return (self.info_bits & 1) == 1
302
303    def is_inline(self):
304        logger = lldb.formatters.Logger.Logger()
305        return (self.info_bits & 0x60) == 0
306
307    # this flag's name is ambiguous, it turns out
308    # we must skip a length byte to get at the data
309    # when this flag is False
310    def has_explicit_length(self):
311        logger = lldb.formatters.Logger.Logger()
312        return (self.info_bits & (1 | 4)) != 4
313
314    # probably a subclass of NSString. obtained this from [str pathExtension]
315    # here info_bits = 0 and Unicode data at the start of the padding word
316    # in the long run using the isa value might be safer as a way to identify this
317    # instead of reading the info_bits
318    def is_special_case(self):
319        logger = lldb.formatters.Logger.Logger()
320        return self.info_bits == 0
321
322    def is_unicode(self):
323        logger = lldb.formatters.Logger.Logger()
324        return (self.info_bits & 0x10) == 0x10
325
326    # preparing ourselves to read into memory
327    # by adjusting architecture-specific info
328    def adjust_for_architecture(self):
329        logger = lldb.formatters.Logger.Logger()
330        self.pointer_size = self.valobj.GetTarget().GetProcess().GetAddressByteSize()
331        self.is_64_bit = self.pointer_size == 8
332        self.is_little = self.valobj.GetTarget().GetProcess(
333        ).GetByteOrder() == lldb.eByteOrderLittle
334
335    # reading info bits out of the CFString and computing
336    # useful values to get at the real data
337    def compute_flags(self):
338        logger = lldb.formatters.Logger.Logger()
339        self.info_bits = self.read_info_bits()
340        if self.info_bits is None:
341            return
342        self.mutable = self.is_mutable()
343        self.inline = self.is_inline()
344        self.explicit = self.has_explicit_length()
345        self.unicode = self.is_unicode()
346        self.special = self.is_special_case()
347
348    def update(self):
349        logger = lldb.formatters.Logger.Logger()
350        self.adjust_for_architecture()
351        self.compute_flags()
352