1# synthetic children and summary provider for CFString 2# (and related NSString class) 3import lldb 4import objc_runtime 5 6def CFString_SummaryProvider (valobj,dict): 7 provider = CFStringSynthProvider(valobj,dict); 8 if provider.invalid == False: 9 try: 10 summary = provider.get_child_at_index(provider.get_child_index("content")).GetSummary(); 11 except: 12 summary = None 13 if summary == None: 14 summary = 'no valid string here' 15 return '@'+summary 16 return '' 17 18def CFAttributedString_SummaryProvider (valobj,dict): 19 offset = valobj.GetTarget().GetProcess().GetAddressByteSize() 20 pointee = valobj.GetValueAsUnsigned(0) 21 summary = 'no valid string here' 22 if pointee != None and pointee != 0: 23 pointee = pointee + offset 24 child_ptr = valobj.CreateValueFromAddress("string_ptr",pointee,valobj.GetType()) 25 child = child_ptr.CreateValueFromAddress("string_data",child_ptr.GetValueAsUnsigned(),valobj.GetType()).AddressOf() 26 provider = CFStringSynthProvider(child,dict); 27 if provider.invalid == False: 28 try: 29 summary = provider.get_child_at_index(provider.get_child_index("content")).GetSummary(); 30 except: 31 summary = 'no valid string here' 32 if summary == None: 33 summary = 'no valid string here' 34 return '@'+summary 35 36 37def __lldb_init_module(debugger,dict): 38 debugger.HandleCommand("type summary add -F CFString.CFString_SummaryProvider NSString CFStringRef CFMutableStringRef") 39 debugger.HandleCommand("type summary add -F CFString.CFAttributedString_SummaryProvider NSAttributedString") 40 41class CFStringSynthProvider: 42 def __init__(self,valobj,dict): 43 self.valobj = valobj; 44 self.update() 45 46 # children other than "content" are for debugging only and must not be used in production code 47 def num_children(self): 48 if self.invalid: 49 return 0; 50 return 6; 51 52 def read_unicode(self, pointer): 53 process = self.valobj.GetTarget().GetProcess() 54 error = lldb.SBError() 55 pystr = u'' 56 # cannot do the read at once because the length value has 57 # a weird encoding. better play it safe here 58 while True: 59 content = process.ReadMemory(pointer, 2, error) 60 new_bytes = bytearray(content) 61 b0 = new_bytes[0] 62 b1 = new_bytes[1] 63 pointer = pointer + 2 64 if b0 == 0 and b1 == 0: 65 break 66 # rearrange bytes depending on endianness 67 # (do we really need this or is Cocoa going to 68 # use Windows-compatible little-endian even 69 # if the target is big endian?) 70 if self.is_little: 71 value = b1 * 256 + b0 72 else: 73 value = b0 * 256 + b1 74 pystr = pystr + unichr(value) 75 return pystr 76 77 # handle the special case strings 78 # only use the custom code for the tested LP64 case 79 def handle_special(self): 80 if self.is_64_bit == False: 81 # for 32bit targets, use safe ObjC code 82 return self.handle_unicode_string_safe() 83 offset = 12 84 pointer = self.valobj.GetValueAsUnsigned(0) + offset 85 pystr = self.read_unicode(pointer) 86 return self.valobj.CreateValueFromExpression("content", 87 "(char*)\"" + pystr.encode('utf-8') + "\"") 88 89 # last resort call, use ObjC code to read; the final aim is to 90 # be able to strip this call away entirely and only do the read 91 # ourselves 92 def handle_unicode_string_safe(self): 93 return self.valobj.CreateValueFromExpression("content", 94 "(char*)\"" + self.valobj.GetObjectDescription() + "\""); 95 96 def handle_unicode_string(self): 97 # step 1: find offset 98 if self.inline: 99 pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base(); 100 if self.explicit == False: 101 # untested, use the safe code path 102 return self.handle_unicode_string_safe(); 103 else: 104 # a full pointer is skipped here before getting to the live data 105 pointer = pointer + self.pointer_size 106 else: 107 pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base(); 108 # read 8 bytes here and make an address out of them 109 try: 110 vopointer = self.valobj.CreateChildAtOffset("dummy", 111 pointer,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); 112 pointer = vopointer.GetValueAsUnsigned(0) 113 except: 114 return self.valobj.CreateValueFromExpression("content", 115 '(char*)"@\"invalid NSString\""') 116 # step 2: read Unicode data at pointer 117 pystr = self.read_unicode(pointer) 118 # step 3: return it 119 return self.valobj.CreateValueFromExpression("content", 120 "(char*)\"" + pystr.encode('utf-8') + "\"") 121 122 def handle_inline_explicit(self): 123 offset = 3*self.pointer_size 124 offset = offset + self.valobj.GetValueAsUnsigned(0) 125 return self.valobj.CreateValueFromExpression("content", 126 "(char*)(" + str(offset) + ")") 127 128 def handle_mutable_string(self): 129 offset = 2 * self.pointer_size 130 data = self.valobj.CreateChildAtOffset("content", 131 offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); 132 data_value = data.GetValueAsUnsigned(0) 133 data_value = data_value + 1 134 return self.valobj.CreateValueFromExpression("content", "(char*)(" + str(data_value) + ")") 135 136 def handle_UTF8_inline(self): 137 offset = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base(); 138 if self.explicit == False: 139 offset = offset + 1; 140 return self.valobj.CreateValueFromAddress("content", 141 offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)).AddressOf(); 142 143 def handle_UTF8_not_inline(self): 144 offset = self.size_of_cfruntime_base(); 145 return self.valobj.CreateChildAtOffset("content", 146 offset,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); 147 148 def get_child_at_index(self,index): 149 if index == 0: 150 return self.valobj.CreateValueFromExpression("mutable", 151 str(int(self.mutable))); 152 if index == 1: 153 return self.valobj.CreateValueFromExpression("inline", 154 str(int(self.inline))); 155 if index == 2: 156 return self.valobj.CreateValueFromExpression("explicit", 157 str(int(self.explicit))); 158 if index == 3: 159 return self.valobj.CreateValueFromExpression("unicode", 160 str(int(self.unicode))); 161 if index == 4: 162 return self.valobj.CreateValueFromExpression("special", 163 str(int(self.special))); 164 if index == 5: 165 # we are handling the several possible combinations of flags. 166 # for each known combination we have a function that knows how to 167 # go fetch the data from memory instead of running code. if a string is not 168 # correctly displayed, one should start by finding a combination of flags that 169 # makes it different from these known cases, and provide a new reader function 170 # if this is not possible, a new flag might have to be made up (like the "special" flag 171 # below, which is not a real flag in CFString), or alternatively one might need to use 172 # the ObjC runtime helper to detect the new class and deal with it accordingly 173 if self.mutable == True: 174 return self.handle_mutable_string() 175 elif self.inline == True and self.explicit == True and \ 176 self.unicode == False and self.special == False and \ 177 self.mutable == False: 178 return self.handle_inline_explicit() 179 elif self.unicode == True: 180 return self.handle_unicode_string(); 181 elif self.special == True: 182 return self.handle_special(); 183 elif self.inline == True: 184 return self.handle_UTF8_inline(); 185 else: 186 return self.handle_UTF8_not_inline(); 187 188 def get_child_index(self,name): 189 if name == "content": 190 return self.num_children() - 1; 191 if name == "mutable": 192 return 0; 193 if name == "inline": 194 return 1; 195 if name == "explicit": 196 return 2; 197 if name == "unicode": 198 return 3; 199 if name == "special": 200 return 4; 201 202 # CFRuntimeBase is defined as having an additional 203 # 4 bytes (padding?) on LP64 architectures 204 # to get its size we add up sizeof(pointer)+4 205 # and then add 4 more bytes if we are on a 64bit system 206 def size_of_cfruntime_base(self): 207 return self.pointer_size+4+(4 if self.is_64_bit else 0) 208 209 # the info bits are part of the CFRuntimeBase structure 210 # to get at them we have to skip a uintptr_t and then get 211 # at the least-significant byte of a 4 byte array. If we are 212 # on big-endian this means going to byte 3, if we are on 213 # little endian (OSX & iOS), this means reading byte 0 214 def offset_of_info_bits(self): 215 offset = self.pointer_size 216 if self.is_little == False: 217 offset = offset + 3; 218 return offset; 219 220 def read_info_bits(self): 221 cfinfo = self.valobj.CreateChildAtOffset("cfinfo", 222 self.offset_of_info_bits(), 223 self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)); 224 cfinfo.SetFormat(11) 225 info = cfinfo.GetValue(); 226 if info != None: 227 self.invalid = False; 228 return int(info,0); 229 else: 230 self.invalid = True; 231 return None; 232 233 # calculating internal flag bits of the CFString object 234 # this stuff is defined and discussed in CFString.c 235 def is_mutable(self): 236 return (self.info_bits & 1) == 1; 237 238 def is_inline(self): 239 return (self.info_bits & 0x60) == 0; 240 241 # this flag's name is ambiguous, it turns out 242 # we must skip a length byte to get at the data 243 # when this flag is False 244 def has_explicit_length(self): 245 return (self.info_bits & (1 | 4)) != 4; 246 247 # probably a subclass of NSString. obtained this from [str pathExtension] 248 # here info_bits = 0 and Unicode data at the start of the padding word 249 # in the long run using the isa value might be safer as a way to identify this 250 # instead of reading the info_bits 251 def is_special_case(self): 252 return self.info_bits == 0; 253 254 def is_unicode(self): 255 return (self.info_bits & 0x10) == 0x10; 256 257 # preparing ourselves to read into memory 258 # by adjusting architecture-specific info 259 def adjust_for_architecture(self): 260 self.pointer_size = self.valobj.GetTarget().GetProcess().GetAddressByteSize() 261 self.is_64_bit = self.pointer_size == 8 262 self.is_little = self.valobj.GetTarget().GetProcess().GetByteOrder() == lldb.eByteOrderLittle 263 264 # reading info bits out of the CFString and computing 265 # useful values to get at the real data 266 def compute_flags(self): 267 self.info_bits = self.read_info_bits(); 268 if self.info_bits == None: 269 return; 270 self.mutable = self.is_mutable(); 271 self.inline = self.is_inline(); 272 self.explicit = self.has_explicit_length(); 273 self.unicode = self.is_unicode(); 274 self.special = self.is_special_case(); 275 276 def update(self): 277 self.adjust_for_architecture(); 278 self.compute_flags();