1# synthetic children and summary provider for CFString 2# (and related NSString class) 3import lldb 4 5def CFString_SummaryProvider (valobj,dict): 6 provider = CFStringSynthProvider(valobj,dict); 7 if provider.invalid == False: 8 try: 9 summary = provider.get_child_at_index(provider.get_child_index("content")).GetSummary(); 10 except: 11 summary = None 12 if summary == None: 13 summary = 'no valid string here' 14 return '@'+summary 15 return '' 16 17def CFAttributedString_SummaryProvider (valobj,dict): 18 offset = valobj.GetTarget().GetProcess().GetAddressByteSize() 19 pointee = valobj.GetValueAsUnsigned(0) 20 summary = 'no valid string here' 21 if pointee != None and pointee != 0: 22 pointee = pointee + offset 23 child_ptr = valobj.CreateValueFromAddress("string_ptr",pointee,valobj.GetType()) 24 child = child_ptr.CreateValueFromAddress("string_data",child_ptr.GetValueAsUnsigned(),valobj.GetType()).AddressOf() 25 provider = CFStringSynthProvider(child,dict); 26 if provider.invalid == False: 27 try: 28 summary = provider.get_child_at_index(provider.get_child_index("content")).GetSummary(); 29 except: 30 summary = 'no valid string here' 31 if summary == None: 32 summary = 'no valid string here' 33 return '@'+summary 34 35 36def __lldb_init_module(debugger,dict): 37 debugger.HandleCommand("type summary add -F CFString.CFString_SummaryProvider NSString CFStringRef CFMutableStringRef") 38 debugger.HandleCommand("type summary add -F CFString.CFAttributedString_SummaryProvider NSAttributedString") 39 40class CFStringSynthProvider: 41 def __init__(self,valobj,dict): 42 self.valobj = valobj; 43 self.update() 44 45 # children other than "content" are for debugging only and must not be used in production code 46 def num_children(self): 47 if self.invalid: 48 return 0; 49 return 6; 50 51 def read_unicode(self, pointer): 52 process = self.valobj.GetTarget().GetProcess() 53 error = lldb.SBError() 54 pystr = u'' 55 # cannot do the read at once because the length value has 56 # a weird encoding. better play it safe here 57 while True: 58 content = process.ReadMemory(pointer, 2, error) 59 new_bytes = bytearray(content) 60 b0 = new_bytes[0] 61 b1 = new_bytes[1] 62 pointer = pointer + 2 63 if b0 == 0 and b1 == 0: 64 break 65 # rearrange bytes depending on endianness 66 # (do we really need this or is Cocoa going to 67 # use Windows-compatible little-endian even 68 # if the target is big endian?) 69 if self.is_little: 70 value = b1 * 256 + b0 71 else: 72 value = b0 * 256 + b1 73 pystr = pystr + unichr(value) 74 return pystr 75 76 # handle the special case strings 77 # only use the custom code for the tested LP64 case 78 def handle_special(self): 79 if self.is_64_bit == False: 80 # for 32bit targets, use safe ObjC code 81 return self.handle_unicode_string_safe() 82 offset = 12 83 pointer = self.valobj.GetValueAsUnsigned(0) + offset 84 pystr = self.read_unicode(pointer) 85 return self.valobj.CreateValueFromExpression("content", 86 "(char*)\"" + pystr.encode('utf-8') + "\"") 87 88 # last resort call, use ObjC code to read; the final aim is to 89 # be able to strip this call away entirely and only do the read 90 # ourselves 91 def handle_unicode_string_safe(self): 92 return self.valobj.CreateValueFromExpression("content", 93 "(char*)\"" + self.valobj.GetObjectDescription() + "\""); 94 95 def handle_unicode_string(self): 96 # step 1: find offset 97 if self.inline: 98 pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base(); 99 if self.explicit == False: 100 # untested, use the safe code path 101 return self.handle_unicode_string_safe(); 102 else: 103 # not sure why 8 bytes are skipped here 104 # (lldb) mem read -c 50 0x00000001001154f0 105 # 0x1001154f0: 98 1a 85 71 ff 7f 00 00 90 07 00 00 01 00 00 00 ...q?........... 106 # 0x100115500: 03 00 00 00 00 00 00 00 *c3 03 78 00 78 00 00 00 ........?.x.x... 107 # 0x100115510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 108 # 0x100115520: 00 00 .. 109 # content begins at * (i.e. 8 bytes into variants, skipping void* buffer in 110 # __notInlineImmutable1 entirely, while the length byte is correctly located 111 # for an inline string) 112 pointer = pointer + 8; 113 else: 114 pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base(); 115 # read 8 bytes here and make an address out of them 116 try: 117 vopointer = self.valobj.CreateChildAtOffset("dummy", 118 pointer,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); 119 pointer = vopointer.GetValueAsUnsigned(0) 120 except: 121 return self.valobj.CreateValueFromExpression("content", 122 '(char*)"@\"invalid NSString\""') 123 # step 2: read Unicode data at pointer 124 pystr = self.read_unicode(pointer) 125 # step 3: return it 126 return self.valobj.CreateValueFromExpression("content", 127 "(char*)\"" + pystr.encode('utf-8') + "\"") 128 129 def handle_inline_explicit(self): 130 if self.is_64_bit: 131 offset = 24 132 else: 133 offset = 12 134 offset = offset + self.valobj.GetValueAsUnsigned(0) 135 return self.valobj.CreateValueFromExpression("content", 136 "(char*)(" + str(offset) + ")") 137 138 def handle_mutable_string(self): 139 if self.is_64_bit: 140 offset = 16 141 else: 142 offset = 8 143 data = self.valobj.CreateChildAtOffset("content", 144 offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); 145 data_value = data.GetValueAsUnsigned(0) 146 data_value = data_value + 1 147 return self.valobj.CreateValueFromExpression("content", "(char*)(" + str(data_value) + ")") 148 149 def handle_UTF8_inline(self): 150 offset = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base(); 151 if self.explicit == False: 152 offset = offset + 1; 153 return self.valobj.CreateValueFromAddress("content", 154 offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)).AddressOf(); 155 156 def handle_UTF8_not_inline(self): 157 offset = self.size_of_cfruntime_base(); 158 return self.valobj.CreateChildAtOffset("content", 159 offset,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); 160 161 def get_child_at_index(self,index): 162 if index == 0: 163 return self.valobj.CreateValueFromExpression("mutable", 164 str(int(self.mutable))); 165 if index == 1: 166 return self.valobj.CreateValueFromExpression("inline", 167 str(int(self.inline))); 168 if index == 2: 169 return self.valobj.CreateValueFromExpression("explicit", 170 str(int(self.explicit))); 171 if index == 3: 172 return self.valobj.CreateValueFromExpression("unicode", 173 str(int(self.unicode))); 174 if index == 4: 175 return self.valobj.CreateValueFromExpression("special", 176 str(int(self.special))); 177 if index == 5: 178 # we are handling the several possible combinations of flags. 179 # for each known combination we have a function that knows how to 180 # go fetch the data from memory instead of running code. if a string is not 181 # correctly displayed, one should start by finding a combination of flags that 182 # makes it different from these known cases, and provide a new reader function 183 # if this is not possible, a new flag might have to be made up (like the "special" flag 184 # below, which is not a real flag in CFString), or alternatively one might need to use 185 # the ObjC runtime helper to detect the new class and deal with it accordingly 186 if self.mutable == True: 187 return self.handle_mutable_string() 188 elif self.inline == True and self.explicit == True and \ 189 self.unicode == False and self.special == False and \ 190 self.mutable == False: 191 return self.handle_inline_explicit() 192 elif self.unicode == True: 193 return self.handle_unicode_string(); 194 elif self.special == True: 195 return self.handle_special(); 196 elif self.inline == True: 197 return self.handle_UTF8_inline(); 198 else: 199 return self.handle_UTF8_not_inline(); 200 201 def get_child_index(self,name): 202 if name == "content": 203 return self.num_children() - 1; 204 if name == "mutable": 205 return 0; 206 if name == "inline": 207 return 1; 208 if name == "explicit": 209 return 2; 210 if name == "unicode": 211 return 3; 212 if name == "special": 213 return 4; 214 215 def is_64bit(self): 216 return self.valobj.GetTarget().GetProcess().GetAddressByteSize() == 8 217 218 def is_little_endian(self): 219 return self.valobj.GetTarget().GetProcess().GetByteOrder() == lldb.eByteOrderLittle 220 221 # CFRuntimeBase is defined as having an additional 222 # 4 bytes (padding?) on LP64 architectures 223 # to get its size we add up sizeof(pointer)+4 224 # and then add 4 more bytes if we are on a 64bit system 225 def size_of_cfruntime_base(self): 226 if self.is_64_bit == True: 227 return 8+4+4; 228 else: 229 return 4+4; 230 231 # the info bits are part of the CFRuntimeBase structure 232 # to get at them we have to skip a uintptr_t and then get 233 # at the least-significant byte of a 4 byte array. If we are 234 # on big-endian this means going to byte 3, if we are on 235 # little endian (OSX & iOS), this means reading byte 0 236 def offset_of_info_bits(self): 237 if self.is_64_bit == True: 238 offset = 8; 239 else: 240 offset = 4; 241 if self.is_little == False: 242 offset = offset + 3; 243 return offset; 244 245 def read_info_bits(self): 246 cfinfo = self.valobj.CreateChildAtOffset("cfinfo", 247 self.offset_of_info_bits(), 248 self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)); 249 cfinfo.SetFormat(11) 250 info = cfinfo.GetValue(); 251 if info != None: 252 self.invalid = False; 253 return int(info,0); 254 else: 255 self.invalid = True; 256 return None; 257 258 # calculating internal flag bits of the CFString object 259 # this stuff is defined and discussed in CFString.c 260 def is_mutable(self): 261 return (self.info_bits & 1) == 1; 262 263 def is_inline(self): 264 return (self.info_bits & 0x60) == 0; 265 266 # this flag's name is ambiguous, it turns out 267 # we must skip a length byte to get at the data 268 # when this flag is False 269 def has_explicit_length(self): 270 return (self.info_bits & (1 | 4)) != 4; 271 272 # probably a subclass of NSString. obtained this from [str pathExtension] 273 # here info_bits = 0 and Unicode data at the start of the padding word 274 # in the long run using the isa value might be safer as a way to identify this 275 # instead of reading the info_bits 276 def is_special_case(self): 277 return self.info_bits == 0; 278 279 def is_unicode(self): 280 return (self.info_bits & 0x10) == 0x10; 281 282 # preparing ourselves to read into memory 283 # by adjusting architecture-specific info 284 def adjust_for_architecture(self): 285 self.is_64_bit = self.is_64bit(); 286 self.is_little = self.is_little_endian(); 287 288 # reading info bits out of the CFString and computing 289 # useful values to get at the real data 290 def compute_flags(self): 291 self.info_bits = self.read_info_bits(); 292 if self.info_bits == None: 293 return; 294 self.mutable = self.is_mutable(); 295 self.inline = self.is_inline(); 296 self.explicit = self.has_explicit_length(); 297 self.unicode = self.is_unicode(); 298 self.special = self.is_special_case(); 299 300 def update(self): 301 self.adjust_for_architecture(); 302 self.compute_flags();