1# synthetic children and summary provider for CFString 2# (and related NSString class) 3import lldb 4import objc_runtime 5 6def CFString_SummaryProvider (valobj,dict): 7 provider = CFStringSynthProvider(valobj,dict); 8 if provider.invalid == False: 9 try: 10 summary = provider.get_child_at_index(provider.get_child_index("content")).GetSummary(); 11 except: 12 summary = None 13 if summary == None: 14 summary = 'no valid string here' 15 return '@'+summary 16 return '' 17 18def CFAttributedString_SummaryProvider (valobj,dict): 19 offset = valobj.GetTarget().GetProcess().GetAddressByteSize() 20 pointee = valobj.GetValueAsUnsigned(0) 21 summary = 'no valid string here' 22 if pointee != None and pointee != 0: 23 pointee = pointee + offset 24 child_ptr = valobj.CreateValueFromAddress("string_ptr",pointee,valobj.GetType()) 25 child = child_ptr.CreateValueFromAddress("string_data",child_ptr.GetValueAsUnsigned(),valobj.GetType()).AddressOf() 26 provider = CFStringSynthProvider(child,dict); 27 if provider.invalid == False: 28 try: 29 summary = provider.get_child_at_index(provider.get_child_index("content")).GetSummary(); 30 except: 31 summary = 'no valid string here' 32 if summary == None: 33 summary = 'no valid string here' 34 return '@'+summary 35 36 37def __lldb_init_module(debugger,dict): 38 debugger.HandleCommand("type summary add -F CFString.CFString_SummaryProvider NSString CFStringRef CFMutableStringRef") 39 debugger.HandleCommand("type summary add -F CFString.CFAttributedString_SummaryProvider NSAttributedString") 40 41class CFStringSynthProvider: 42 def __init__(self,valobj,dict): 43 self.valobj = valobj; 44 self.update() 45 46 # children other than "content" are for debugging only and must not be used in production code 47 def num_children(self): 48 if self.invalid: 49 return 0; 50 return 6; 51 52 def read_unicode(self, pointer): 53 process = self.valobj.GetTarget().GetProcess() 54 error = lldb.SBError() 55 pystr = u'' 56 # cannot do the read at once because the length value has 57 # a weird encoding. better play it safe here 58 while True: 59 content = process.ReadMemory(pointer, 2, error) 60 new_bytes = bytearray(content) 61 b0 = new_bytes[0] 62 b1 = new_bytes[1] 63 pointer = pointer + 2 64 if b0 == 0 and b1 == 0: 65 break 66 # rearrange bytes depending on endianness 67 # (do we really need this or is Cocoa going to 68 # use Windows-compatible little-endian even 69 # if the target is big endian?) 70 if self.is_little: 71 value = b1 * 256 + b0 72 else: 73 value = b0 * 256 + b1 74 pystr = pystr + unichr(value) 75 return pystr 76 77 # handle the special case strings 78 # only use the custom code for the tested LP64 case 79 def handle_special(self): 80 if self.is_64_bit == False: 81 # for 32bit targets, use safe ObjC code 82 return self.handle_unicode_string_safe() 83 offset = 12 84 pointer = self.valobj.GetValueAsUnsigned(0) + offset 85 pystr = self.read_unicode(pointer) 86 return self.valobj.CreateValueFromExpression("content", 87 "(char*)\"" + pystr.encode('utf-8') + "\"") 88 89 # last resort call, use ObjC code to read; the final aim is to 90 # be able to strip this call away entirely and only do the read 91 # ourselves 92 def handle_unicode_string_safe(self): 93 return self.valobj.CreateValueFromExpression("content", 94 "(char*)\"" + self.valobj.GetObjectDescription() + "\""); 95 96 def handle_unicode_string(self): 97 # step 1: find offset 98 if self.inline: 99 pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base(); 100 if self.explicit == False: 101 # untested, use the safe code path 102 return self.handle_unicode_string_safe(); 103 else: 104 # not sure why 8 bytes are skipped here 105 # (lldb) mem read -c 50 0x00000001001154f0 106 # 0x1001154f0: 98 1a 85 71 ff 7f 00 00 90 07 00 00 01 00 00 00 ...q?........... 107 # 0x100115500: 03 00 00 00 00 00 00 00 *c3 03 78 00 78 00 00 00 ........?.x.x... 108 # 0x100115510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 109 # 0x100115520: 00 00 .. 110 # content begins at * (i.e. 8 bytes into variants, skipping void* buffer in 111 # __notInlineImmutable1 entirely, while the length byte is correctly located 112 # for an inline string) 113 # on NMOS in 32 bit mode, we need to skip 4 bytes instead of why 114 # if the same occurs on Lion, then this simply needs to be pointer + pointer_size 115 if self.is_64_bit == False and objc_runtime.Utilities.check_is_osx_lion(self.valobj.GetTarget()) == False: 116 pointer = pointer + 4 117 else: 118 pointer = pointer + 8; 119 else: 120 pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base(); 121 # read 8 bytes here and make an address out of them 122 try: 123 vopointer = self.valobj.CreateChildAtOffset("dummy", 124 pointer,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); 125 pointer = vopointer.GetValueAsUnsigned(0) 126 except: 127 return self.valobj.CreateValueFromExpression("content", 128 '(char*)"@\"invalid NSString\""') 129 # step 2: read Unicode data at pointer 130 pystr = self.read_unicode(pointer) 131 # step 3: return it 132 return self.valobj.CreateValueFromExpression("content", 133 "(char*)\"" + pystr.encode('utf-8') + "\"") 134 135 def handle_inline_explicit(self): 136 if self.is_64_bit: 137 offset = 24 138 else: 139 offset = 12 140 offset = offset + self.valobj.GetValueAsUnsigned(0) 141 return self.valobj.CreateValueFromExpression("content", 142 "(char*)(" + str(offset) + ")") 143 144 def handle_mutable_string(self): 145 if self.is_64_bit: 146 offset = 16 147 else: 148 offset = 8 149 data = self.valobj.CreateChildAtOffset("content", 150 offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); 151 data_value = data.GetValueAsUnsigned(0) 152 data_value = data_value + 1 153 return self.valobj.CreateValueFromExpression("content", "(char*)(" + str(data_value) + ")") 154 155 def handle_UTF8_inline(self): 156 offset = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base(); 157 if self.explicit == False: 158 offset = offset + 1; 159 return self.valobj.CreateValueFromAddress("content", 160 offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)).AddressOf(); 161 162 def handle_UTF8_not_inline(self): 163 offset = self.size_of_cfruntime_base(); 164 return self.valobj.CreateChildAtOffset("content", 165 offset,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); 166 167 def get_child_at_index(self,index): 168 if index == 0: 169 return self.valobj.CreateValueFromExpression("mutable", 170 str(int(self.mutable))); 171 if index == 1: 172 return self.valobj.CreateValueFromExpression("inline", 173 str(int(self.inline))); 174 if index == 2: 175 return self.valobj.CreateValueFromExpression("explicit", 176 str(int(self.explicit))); 177 if index == 3: 178 return self.valobj.CreateValueFromExpression("unicode", 179 str(int(self.unicode))); 180 if index == 4: 181 return self.valobj.CreateValueFromExpression("special", 182 str(int(self.special))); 183 if index == 5: 184 # we are handling the several possible combinations of flags. 185 # for each known combination we have a function that knows how to 186 # go fetch the data from memory instead of running code. if a string is not 187 # correctly displayed, one should start by finding a combination of flags that 188 # makes it different from these known cases, and provide a new reader function 189 # if this is not possible, a new flag might have to be made up (like the "special" flag 190 # below, which is not a real flag in CFString), or alternatively one might need to use 191 # the ObjC runtime helper to detect the new class and deal with it accordingly 192 if self.mutable == True: 193 return self.handle_mutable_string() 194 elif self.inline == True and self.explicit == True and \ 195 self.unicode == False and self.special == False and \ 196 self.mutable == False: 197 return self.handle_inline_explicit() 198 elif self.unicode == True: 199 return self.handle_unicode_string(); 200 elif self.special == True: 201 return self.handle_special(); 202 elif self.inline == True: 203 return self.handle_UTF8_inline(); 204 else: 205 return self.handle_UTF8_not_inline(); 206 207 def get_child_index(self,name): 208 if name == "content": 209 return self.num_children() - 1; 210 if name == "mutable": 211 return 0; 212 if name == "inline": 213 return 1; 214 if name == "explicit": 215 return 2; 216 if name == "unicode": 217 return 3; 218 if name == "special": 219 return 4; 220 221 def is_64bit(self): 222 return self.valobj.GetTarget().GetProcess().GetAddressByteSize() == 8 223 224 def is_little_endian(self): 225 return self.valobj.GetTarget().GetProcess().GetByteOrder() == lldb.eByteOrderLittle 226 227 # CFRuntimeBase is defined as having an additional 228 # 4 bytes (padding?) on LP64 architectures 229 # to get its size we add up sizeof(pointer)+4 230 # and then add 4 more bytes if we are on a 64bit system 231 def size_of_cfruntime_base(self): 232 if self.is_64_bit == True: 233 return 8+4+4; 234 else: 235 return 4+4; 236 237 # the info bits are part of the CFRuntimeBase structure 238 # to get at them we have to skip a uintptr_t and then get 239 # at the least-significant byte of a 4 byte array. If we are 240 # on big-endian this means going to byte 3, if we are on 241 # little endian (OSX & iOS), this means reading byte 0 242 def offset_of_info_bits(self): 243 if self.is_64_bit == True: 244 offset = 8; 245 else: 246 offset = 4; 247 if self.is_little == False: 248 offset = offset + 3; 249 return offset; 250 251 def read_info_bits(self): 252 cfinfo = self.valobj.CreateChildAtOffset("cfinfo", 253 self.offset_of_info_bits(), 254 self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)); 255 cfinfo.SetFormat(11) 256 info = cfinfo.GetValue(); 257 if info != None: 258 self.invalid = False; 259 return int(info,0); 260 else: 261 self.invalid = True; 262 return None; 263 264 # calculating internal flag bits of the CFString object 265 # this stuff is defined and discussed in CFString.c 266 def is_mutable(self): 267 return (self.info_bits & 1) == 1; 268 269 def is_inline(self): 270 return (self.info_bits & 0x60) == 0; 271 272 # this flag's name is ambiguous, it turns out 273 # we must skip a length byte to get at the data 274 # when this flag is False 275 def has_explicit_length(self): 276 return (self.info_bits & (1 | 4)) != 4; 277 278 # probably a subclass of NSString. obtained this from [str pathExtension] 279 # here info_bits = 0 and Unicode data at the start of the padding word 280 # in the long run using the isa value might be safer as a way to identify this 281 # instead of reading the info_bits 282 def is_special_case(self): 283 return self.info_bits == 0; 284 285 def is_unicode(self): 286 return (self.info_bits & 0x10) == 0x10; 287 288 # preparing ourselves to read into memory 289 # by adjusting architecture-specific info 290 def adjust_for_architecture(self): 291 self.is_64_bit = self.is_64bit(); 292 self.is_little = self.is_little_endian(); 293 294 # reading info bits out of the CFString and computing 295 # useful values to get at the real data 296 def compute_flags(self): 297 self.info_bits = self.read_info_bits(); 298 if self.info_bits == None: 299 return; 300 self.mutable = self.is_mutable(); 301 self.inline = self.is_inline(); 302 self.explicit = self.has_explicit_length(); 303 self.unicode = self.is_unicode(); 304 self.special = self.is_special_case(); 305 306 def update(self): 307 self.adjust_for_architecture(); 308 self.compute_flags();