xref: /llvm-project/lldb/examples/summaries/cocoa/CFString.py (revision 7bc0ec3aad663a2c81fddf9da38dba46bba6be19)
1# synthetic children and summary provider for CFString
2# (and related NSString class)
3import lldb
4
5def CFString_SummaryProvider (valobj,dict):
6	provider = CFStringSynthProvider(valobj,dict);
7	if provider.invalid == False:
8	    try:
9	        summary = provider.get_child_at_index(provider.get_child_index("content")).GetSummary();
10	    except:
11	        summary = None
12	    if summary == None:
13	        summary = 'no valid string here'
14	    return '@'+summary
15	return ''
16
17def CFAttributedString_SummaryProvider (valobj,dict):
18	offset = valobj.GetTarget().GetProcess().GetAddressByteSize()
19	pointee = valobj.GetValueAsUnsigned(0)
20	summary = 'no valid string here'
21	if pointee != None and pointee != 0:
22		pointee = pointee + offset
23		child_ptr = valobj.CreateValueFromAddress("string_ptr",pointee,valobj.GetType())
24		child = child_ptr.CreateValueFromAddress("string_data",child_ptr.GetValueAsUnsigned(),valobj.GetType()).AddressOf()
25		provider = CFStringSynthProvider(child,dict);
26		if provider.invalid == False:
27			try:
28				summary = provider.get_child_at_index(provider.get_child_index("content")).GetSummary();
29			except:
30				summary = 'no valid string here'
31	if summary == None:
32		summary = 'no valid string here'
33	return '@'+summary
34
35
36def __lldb_init_module(debugger,dict):
37	debugger.HandleCommand("type summary add -F CFString.CFString_SummaryProvider NSString CFStringRef CFMutableStringRef")
38	debugger.HandleCommand("type summary add -F CFString.CFAttributedString_SummaryProvider NSAttributedString")
39
40class CFStringSynthProvider:
41	def __init__(self,valobj,dict):
42		self.valobj = valobj;
43		self.update()
44
45	# children other than "content" are for debugging only and must not be used in production code
46	def num_children(self):
47		if self.invalid:
48			return 0;
49		return 6;
50
51	def read_unicode(self, pointer):
52		process = self.valobj.GetTarget().GetProcess()
53		error = lldb.SBError()
54		pystr = u''
55		# cannot do the read at once because the length value has
56		# a weird encoding. better play it safe here
57		while True:
58			content = process.ReadMemory(pointer, 2, error)
59			new_bytes = bytearray(content)
60			b0 = new_bytes[0]
61			b1 = new_bytes[1]
62			pointer = pointer + 2
63			if b0 == 0 and b1 == 0:
64				break
65			# rearrange bytes depending on endianness
66			# (do we really need this or is Cocoa going to
67			#  use Windows-compatible little-endian even
68			#  if the target is big endian?)
69			if self.is_little:
70				value = b1 * 256 + b0
71			else:
72				value = b0 * 256 + b1
73			pystr = pystr + unichr(value)
74		return pystr
75
76	# handle the special case strings
77	# only use the custom code for the tested LP64 case
78	def handle_special(self):
79		if self.is_64_bit == False:
80			# for 32bit targets, use safe ObjC code
81			return self.handle_unicode_string_safe()
82		offset = 12
83		pointer = self.valobj.GetValueAsUnsigned(0) + offset
84		pystr = self.read_unicode(pointer)
85		return self.valobj.CreateValueFromExpression("content",
86			"(char*)\"" + pystr.encode('utf-8') + "\"")
87
88	# last resort call, use ObjC code to read; the final aim is to
89	# be able to strip this call away entirely and only do the read
90	# ourselves
91	def handle_unicode_string_safe(self):
92		return self.valobj.CreateValueFromExpression("content",
93			"(char*)\"" + self.valobj.GetObjectDescription() + "\"");
94
95	def handle_unicode_string(self):
96		# step 1: find offset
97		if self.inline:
98			pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base();
99			if self.explicit == False:
100				# untested, use the safe code path
101				return self.handle_unicode_string_safe();
102			else:
103				# not sure why 8 bytes are skipped here
104				# (lldb) mem read -c 50 0x00000001001154f0
105				# 0x1001154f0: 98 1a 85 71 ff 7f 00 00 90 07 00 00 01 00 00 00  ...q?...........
106				# 0x100115500: 03 00 00 00 00 00 00 00 *c3 03 78 00 78 00 00 00  ........?.x.x...
107				# 0x100115510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
108				# 0x100115520: 00 00                                            ..
109				# content begins at * (i.e. 8 bytes into variants, skipping void* buffer in
110				# __notInlineImmutable1 entirely, while the length byte is correctly located
111				# for an inline string)
112				pointer = pointer + 8;
113		else:
114			pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base();
115			# read 8 bytes here and make an address out of them
116			try:
117			    vopointer = self.valobj.CreateChildAtOffset("dummy",
118				pointer,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType());
119			    pointer = vopointer.GetValueAsUnsigned(0)
120			except:
121			    return self.valobj.CreateValueFromExpression("content",
122                                                             '(char*)"@\"invalid NSString\""')
123		# step 2: read Unicode data at pointer
124		pystr = self.read_unicode(pointer)
125		# step 3: return it
126		return self.valobj.CreateValueFromExpression("content",
127			"(char*)\"" + pystr.encode('utf-8') + "\"")
128
129	def handle_inline_explicit(self):
130		if self.is_64_bit:
131			offset = 24
132		else:
133			offset = 12
134		offset = offset + self.valobj.GetValueAsUnsigned(0)
135		return self.valobj.CreateValueFromExpression("content",
136				"(char*)(" + str(offset) + ")")
137
138	def handle_mutable_string(self):
139		if self.is_64_bit:
140			offset = 16
141		else:
142			offset = 8
143		data = self.valobj.CreateChildAtOffset("content",
144			offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType());
145		data_value = data.GetValueAsUnsigned(0)
146		data_value = data_value + 1
147		return self.valobj.CreateValueFromExpression("content", "(char*)(" + str(data_value) + ")")
148
149	def handle_UTF8_inline(self):
150		offset = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base();
151		if self.explicit == False:
152			offset = offset + 1;
153		return self.valobj.CreateValueFromAddress("content",
154				offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)).AddressOf();
155
156	def handle_UTF8_not_inline(self):
157		offset = self.size_of_cfruntime_base();
158		return self.valobj.CreateChildAtOffset("content",
159				offset,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType());
160
161	def get_child_at_index(self,index):
162		if index == 0:
163			return self.valobj.CreateValueFromExpression("mutable",
164				str(int(self.mutable)));
165		if index == 1:
166			return self.valobj.CreateValueFromExpression("inline",
167				str(int(self.inline)));
168		if index == 2:
169			return self.valobj.CreateValueFromExpression("explicit",
170				str(int(self.explicit)));
171		if index == 3:
172			return self.valobj.CreateValueFromExpression("unicode",
173				str(int(self.unicode)));
174		if index == 4:
175			return self.valobj.CreateValueFromExpression("special",
176				str(int(self.special)));
177		if index == 5:
178			# we are handling the several possible combinations of flags.
179			# for each known combination we have a function that knows how to
180			# go fetch the data from memory instead of running code. if a string is not
181			# correctly displayed, one should start by finding a combination of flags that
182			# makes it different from these known cases, and provide a new reader function
183			# if this is not possible, a new flag might have to be made up (like the "special" flag
184			# below, which is not a real flag in CFString), or alternatively one might need to use
185			# the ObjC runtime helper to detect the new class and deal with it accordingly
186			if self.mutable == True:
187				return self.handle_mutable_string()
188			elif self.inline == True and self.explicit == True and \
189			   self.unicode == False and self.special == False and \
190			   self.mutable == False:
191				return self.handle_inline_explicit()
192			elif self.unicode == True:
193				return self.handle_unicode_string();
194			elif self.special == True:
195				return self.handle_special();
196			elif self.inline == True:
197				return self.handle_UTF8_inline();
198			else:
199				return self.handle_UTF8_not_inline();
200
201	def get_child_index(self,name):
202		if name == "content":
203			return self.num_children() - 1;
204		if name == "mutable":
205			return 0;
206		if name == "inline":
207			return 1;
208		if name == "explicit":
209			return 2;
210		if name == "unicode":
211			return 3;
212		if name == "special":
213			return 4;
214
215	def is_64bit(self):
216		return self.valobj.GetTarget().GetProcess().GetAddressByteSize() == 8
217
218	def is_little_endian(self):
219		return self.valobj.GetTarget().GetProcess().GetByteOrder() == lldb.eByteOrderLittle
220
221	# CFRuntimeBase is defined as having an additional
222	# 4 bytes (padding?) on LP64 architectures
223	# to get its size we add up sizeof(pointer)+4
224	# and then add 4 more bytes if we are on a 64bit system
225	def size_of_cfruntime_base(self):
226		if self.is_64_bit == True:
227			return 8+4+4;
228		else:
229			return 4+4;
230
231	# the info bits are part of the CFRuntimeBase structure
232	# to get at them we have to skip a uintptr_t and then get
233	# at the least-significant byte of a 4 byte array. If we are
234	# on big-endian this means going to byte 3, if we are on
235	# little endian (OSX & iOS), this means reading byte 0
236	def offset_of_info_bits(self):
237		if self.is_64_bit == True:
238			offset = 8;
239		else:
240			offset = 4;
241		if self.is_little == False:
242			offset = offset + 3;
243		return offset;
244
245	def read_info_bits(self):
246		cfinfo = self.valobj.CreateChildAtOffset("cfinfo",
247					self.offset_of_info_bits(),
248					self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar));
249		cfinfo.SetFormat(11)
250		info = cfinfo.GetValue();
251		if info != None:
252			self.invalid = False;
253			return int(info,0);
254		else:
255			self.invalid = True;
256			return None;
257
258	# calculating internal flag bits of the CFString object
259	# this stuff is defined and discussed in CFString.c
260	def is_mutable(self):
261		return (self.info_bits & 1) == 1;
262
263	def is_inline(self):
264		return (self.info_bits & 0x60) == 0;
265
266	# this flag's name is ambiguous, it turns out
267	# we must skip a length byte to get at the data
268	# when this flag is False
269	def has_explicit_length(self):
270		return (self.info_bits & (1 | 4)) != 4;
271
272	# probably a subclass of NSString. obtained this from [str pathExtension]
273	# here info_bits = 0 and Unicode data at the start of the padding word
274	# in the long run using the isa value might be safer as a way to identify this
275	# instead of reading the info_bits
276	def is_special_case(self):
277		return self.info_bits == 0;
278
279	def is_unicode(self):
280		return (self.info_bits & 0x10) == 0x10;
281
282	# preparing ourselves to read into memory
283	# by adjusting architecture-specific info
284	def adjust_for_architecture(self):
285		self.is_64_bit = self.is_64bit();
286		self.is_little = self.is_little_endian();
287
288	# reading info bits out of the CFString and computing
289	# useful values to get at the real data
290	def compute_flags(self):
291		self.info_bits = self.read_info_bits();
292		if self.info_bits == None:
293			return;
294		self.mutable = self.is_mutable();
295		self.inline = self.is_inline();
296		self.explicit = self.has_explicit_length();
297		self.unicode = self.is_unicode();
298		self.special = self.is_special_case();
299
300	def update(self):
301		self.adjust_for_architecture();
302		self.compute_flags();