1""" 2LLDB AppKit formatters 3 4Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5See https://llvm.org/LICENSE.txt for license information. 6SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7""" 8# example synthetic children and summary provider for CFString (and related NSString class) 9# the real code is part of the LLDB core 10import lldb 11import lldb.runtime.objc.objc_runtime 12import lldb.formatters.Logger 13 14try: 15 unichr 16except NameError: 17 unichr = chr 18 19 20def CFString_SummaryProvider(valobj, dict): 21 logger = lldb.formatters.Logger.Logger() 22 provider = CFStringSynthProvider(valobj, dict) 23 if not provider.invalid: 24 try: 25 summary = provider.get_child_at_index(provider.get_child_index("content")) 26 if isinstance(summary, lldb.SBValue): 27 summary = summary.GetSummary() 28 else: 29 summary = '"' + summary + '"' 30 except: 31 summary = None 32 if summary is None: 33 summary = "<variable is not NSString>" 34 return "@" + summary 35 return "" 36 37 38def CFAttributedString_SummaryProvider(valobj, dict): 39 logger = lldb.formatters.Logger.Logger() 40 offset = valobj.GetTarget().GetProcess().GetAddressByteSize() 41 pointee = valobj.GetValueAsUnsigned(0) 42 summary = "<variable is not NSAttributedString>" 43 if pointee is not None and pointee != 0: 44 pointee = pointee + offset 45 child_ptr = valobj.CreateValueFromAddress( 46 "string_ptr", pointee, valobj.GetType() 47 ) 48 child = child_ptr.CreateValueFromAddress( 49 "string_data", child_ptr.GetValueAsUnsigned(), valobj.GetType() 50 ).AddressOf() 51 provider = CFStringSynthProvider(child, dict) 52 if not provider.invalid: 53 try: 54 summary = provider.get_child_at_index( 55 provider.get_child_index("content") 56 ).GetSummary() 57 except: 58 summary = "<variable is not NSAttributedString>" 59 if summary is None: 60 summary = "<variable is not NSAttributedString>" 61 return "@" + summary 62 63 64def __lldb_init_module(debugger, dict): 65 debugger.HandleCommand( 66 "type summary add -F CFString.CFString_SummaryProvider NSString CFStringRef CFMutableStringRef" 67 ) 68 debugger.HandleCommand( 69 "type summary add -F CFString.CFAttributedString_SummaryProvider NSAttributedString" 70 ) 71 72 73class CFStringSynthProvider: 74 def __init__(self, valobj, dict): 75 logger = lldb.formatters.Logger.Logger() 76 self.valobj = valobj 77 self.update() 78 79 # children other than "content" are for debugging only and must not be 80 # used in production code 81 def num_children(self): 82 logger = lldb.formatters.Logger.Logger() 83 if self.invalid: 84 return 0 85 return 6 86 87 def read_unicode(self, pointer, max_len=2048): 88 logger = lldb.formatters.Logger.Logger() 89 process = self.valobj.GetTarget().GetProcess() 90 error = lldb.SBError() 91 pystr = "" 92 # cannot do the read at once because the length value has 93 # a weird encoding. better play it safe here 94 while max_len > 0: 95 content = process.ReadMemory(pointer, 2, error) 96 new_bytes = bytearray(content) 97 b0 = new_bytes[0] 98 b1 = new_bytes[1] 99 pointer = pointer + 2 100 if b0 == 0 and b1 == 0: 101 break 102 # rearrange bytes depending on endianness 103 # (do we really need this or is Cocoa going to 104 # use Windows-compatible little-endian even 105 # if the target is big endian?) 106 if self.is_little: 107 value = b1 * 256 + b0 108 else: 109 value = b0 * 256 + b1 110 pystr = pystr + unichr(value) 111 # read max_len unicode values, not max_len bytes 112 max_len = max_len - 1 113 return pystr 114 115 # handle the special case strings 116 # only use the custom code for the tested LP64 case 117 def handle_special(self): 118 logger = lldb.formatters.Logger.Logger() 119 if not self.is_64_bit: 120 # for 32bit targets, use safe ObjC code 121 return self.handle_unicode_string_safe() 122 offset = 12 123 pointer = self.valobj.GetValueAsUnsigned(0) + offset 124 pystr = self.read_unicode(pointer) 125 return self.valobj.CreateValueFromExpression( 126 "content", '(char*)"' + pystr.encode("utf-8") + '"' 127 ) 128 129 # last resort call, use ObjC code to read; the final aim is to 130 # be able to strip this call away entirely and only do the read 131 # ourselves 132 def handle_unicode_string_safe(self): 133 return self.valobj.CreateValueFromExpression( 134 "content", '(char*)"' + self.valobj.GetObjectDescription() + '"' 135 ) 136 137 def handle_unicode_string(self): 138 logger = lldb.formatters.Logger.Logger() 139 # step 1: find offset 140 if self.inline: 141 pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base() 142 if not self.explicit: 143 # untested, use the safe code path 144 return self.handle_unicode_string_safe() 145 else: 146 # a full pointer is skipped here before getting to the live 147 # data 148 pointer = pointer + self.pointer_size 149 else: 150 pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base() 151 # read 8 bytes here and make an address out of them 152 try: 153 char_type = ( 154 self.valobj.GetType() 155 .GetBasicType(lldb.eBasicTypeChar) 156 .GetPointerType() 157 ) 158 vopointer = self.valobj.CreateValueFromAddress( 159 "dummy", pointer, char_type 160 ) 161 pointer = vopointer.GetValueAsUnsigned(0) 162 except: 163 return self.valobj.CreateValueFromExpression( 164 "content", '(char*)"@"invalid NSString""' 165 ) 166 # step 2: read Unicode data at pointer 167 pystr = self.read_unicode(pointer) 168 # step 3: return it 169 return pystr.encode("utf-8") 170 171 def handle_inline_explicit(self): 172 logger = lldb.formatters.Logger.Logger() 173 offset = 3 * self.pointer_size 174 offset = offset + self.valobj.GetValueAsUnsigned(0) 175 return self.valobj.CreateValueFromExpression( 176 "content", "(char*)(" + str(offset) + ")" 177 ) 178 179 def handle_mutable_string(self): 180 logger = lldb.formatters.Logger.Logger() 181 offset = 2 * self.pointer_size 182 data = self.valobj.CreateChildAtOffset( 183 "content", 184 offset, 185 self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType(), 186 ) 187 data_value = data.GetValueAsUnsigned(0) 188 if self.explicit and self.unicode: 189 return self.read_unicode(data_value).encode("utf-8") 190 else: 191 data_value = data_value + 1 192 return self.valobj.CreateValueFromExpression( 193 "content", "(char*)(" + str(data_value) + ")" 194 ) 195 196 def handle_UTF8_inline(self): 197 logger = lldb.formatters.Logger.Logger() 198 offset = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base() 199 if not self.explicit: 200 offset = offset + 1 201 return self.valobj.CreateValueFromAddress( 202 "content", offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar) 203 ).AddressOf() 204 205 def handle_UTF8_not_inline(self): 206 logger = lldb.formatters.Logger.Logger() 207 offset = self.size_of_cfruntime_base() 208 return self.valobj.CreateChildAtOffset( 209 "content", 210 offset, 211 self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType(), 212 ) 213 214 def get_child_at_index(self, index): 215 logger = lldb.formatters.Logger.Logger() 216 logger >> "Querying for child [" + str(index) + "]" 217 if index == 0: 218 return self.valobj.CreateValueFromExpression( 219 "mutable", str(int(self.mutable)) 220 ) 221 if index == 1: 222 return self.valobj.CreateValueFromExpression( 223 "inline", str(int(self.inline)) 224 ) 225 if index == 2: 226 return self.valobj.CreateValueFromExpression( 227 "explicit", str(int(self.explicit)) 228 ) 229 if index == 3: 230 return self.valobj.CreateValueFromExpression( 231 "unicode", str(int(self.unicode)) 232 ) 233 if index == 4: 234 return self.valobj.CreateValueFromExpression( 235 "special", str(int(self.special)) 236 ) 237 if index == 5: 238 # we are handling the several possible combinations of flags. 239 # for each known combination we have a function that knows how to 240 # go fetch the data from memory instead of running code. if a string is not 241 # correctly displayed, one should start by finding a combination of flags that 242 # makes it different from these known cases, and provide a new reader function 243 # if this is not possible, a new flag might have to be made up (like the "special" flag 244 # below, which is not a real flag in CFString), or alternatively one might need to use 245 # the ObjC runtime helper to detect the new class and deal with it accordingly 246 # print 'mutable = ' + str(self.mutable) 247 # print 'inline = ' + str(self.inline) 248 # print 'explicit = ' + str(self.explicit) 249 # print 'unicode = ' + str(self.unicode) 250 # print 'special = ' + str(self.special) 251 if self.mutable: 252 return self.handle_mutable_string() 253 elif ( 254 self.inline 255 and self.explicit 256 and not self.unicode 257 and not self.special 258 and not self.mutable 259 ): 260 return self.handle_inline_explicit() 261 elif self.unicode: 262 return self.handle_unicode_string() 263 elif self.special: 264 return self.handle_special() 265 elif self.inline: 266 return self.handle_UTF8_inline() 267 else: 268 return self.handle_UTF8_not_inline() 269 270 def get_child_index(self, name): 271 logger = lldb.formatters.Logger.Logger() 272 logger >> "Querying for child ['" + str(name) + "']" 273 if name == "content": 274 return self.num_children() - 1 275 if name == "mutable": 276 return 0 277 if name == "inline": 278 return 1 279 if name == "explicit": 280 return 2 281 if name == "unicode": 282 return 3 283 if name == "special": 284 return 4 285 286 # CFRuntimeBase is defined as having an additional 287 # 4 bytes (padding?) on LP64 architectures 288 # to get its size we add up sizeof(pointer)+4 289 # and then add 4 more bytes if we are on a 64bit system 290 def size_of_cfruntime_base(self): 291 logger = lldb.formatters.Logger.Logger() 292 return self.pointer_size + 4 + (4 if self.is_64_bit else 0) 293 294 # the info bits are part of the CFRuntimeBase structure 295 # to get at them we have to skip a uintptr_t and then get 296 # at the least-significant byte of a 4 byte array. If we are 297 # on big-endian this means going to byte 3, if we are on 298 # little endian (OSX & iOS), this means reading byte 0 299 def offset_of_info_bits(self): 300 logger = lldb.formatters.Logger.Logger() 301 offset = self.pointer_size 302 if not self.is_little: 303 offset = offset + 3 304 return offset 305 306 def read_info_bits(self): 307 logger = lldb.formatters.Logger.Logger() 308 cfinfo = self.valobj.CreateChildAtOffset( 309 "cfinfo", 310 self.offset_of_info_bits(), 311 self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar), 312 ) 313 cfinfo.SetFormat(11) 314 info = cfinfo.GetValue() 315 if info is not None: 316 self.invalid = False 317 return int(info, 0) 318 else: 319 self.invalid = True 320 return None 321 322 # calculating internal flag bits of the CFString object 323 # this stuff is defined and discussed in CFString.c 324 def is_mutable(self): 325 logger = lldb.formatters.Logger.Logger() 326 return (self.info_bits & 1) == 1 327 328 def is_inline(self): 329 logger = lldb.formatters.Logger.Logger() 330 return (self.info_bits & 0x60) == 0 331 332 # this flag's name is ambiguous, it turns out 333 # we must skip a length byte to get at the data 334 # when this flag is False 335 def has_explicit_length(self): 336 logger = lldb.formatters.Logger.Logger() 337 return (self.info_bits & (1 | 4)) != 4 338 339 # probably a subclass of NSString. obtained this from [str pathExtension] 340 # here info_bits = 0 and Unicode data at the start of the padding word 341 # in the long run using the isa value might be safer as a way to identify this 342 # instead of reading the info_bits 343 def is_special_case(self): 344 logger = lldb.formatters.Logger.Logger() 345 return self.info_bits == 0 346 347 def is_unicode(self): 348 logger = lldb.formatters.Logger.Logger() 349 return (self.info_bits & 0x10) == 0x10 350 351 # preparing ourselves to read into memory 352 # by adjusting architecture-specific info 353 def adjust_for_architecture(self): 354 logger = lldb.formatters.Logger.Logger() 355 self.pointer_size = self.valobj.GetTarget().GetProcess().GetAddressByteSize() 356 self.is_64_bit = self.pointer_size == 8 357 self.is_little = ( 358 self.valobj.GetTarget().GetProcess().GetByteOrder() == lldb.eByteOrderLittle 359 ) 360 361 # reading info bits out of the CFString and computing 362 # useful values to get at the real data 363 def compute_flags(self): 364 logger = lldb.formatters.Logger.Logger() 365 self.info_bits = self.read_info_bits() 366 if self.info_bits is None: 367 return 368 self.mutable = self.is_mutable() 369 self.inline = self.is_inline() 370 self.explicit = self.has_explicit_length() 371 self.unicode = self.is_unicode() 372 self.special = self.is_special_case() 373 374 def update(self): 375 logger = lldb.formatters.Logger.Logger() 376 self.adjust_for_architecture() 377 self.compute_flags() 378