xref: /llvm-project/lldb/examples/summaries/cocoa/CFString.py (revision 896cd1d3e675b1c7be05be9975b09df1bafca9eb)
1# synthetic children and summary provider for CFString
2# (and related NSString class)
3import lldb
4import objc_runtime
5
6def CFString_SummaryProvider (valobj,dict):
7	provider = CFStringSynthProvider(valobj,dict);
8	if provider.invalid == False:
9	    try:
10	        summary = provider.get_child_at_index(provider.get_child_index("content")).GetSummary();
11	    except:
12	        summary = None
13	    if summary == None:
14	        summary = 'no valid string here'
15	    return '@'+summary
16	return ''
17
18def CFAttributedString_SummaryProvider (valobj,dict):
19	offset = valobj.GetTarget().GetProcess().GetAddressByteSize()
20	pointee = valobj.GetValueAsUnsigned(0)
21	summary = 'no valid string here'
22	if pointee != None and pointee != 0:
23		pointee = pointee + offset
24		child_ptr = valobj.CreateValueFromAddress("string_ptr",pointee,valobj.GetType())
25		child = child_ptr.CreateValueFromAddress("string_data",child_ptr.GetValueAsUnsigned(),valobj.GetType()).AddressOf()
26		provider = CFStringSynthProvider(child,dict);
27		if provider.invalid == False:
28			try:
29				summary = provider.get_child_at_index(provider.get_child_index("content")).GetSummary();
30			except:
31				summary = 'no valid string here'
32	if summary == None:
33		summary = 'no valid string here'
34	return '@'+summary
35
36
37def __lldb_init_module(debugger,dict):
38	debugger.HandleCommand("type summary add -F CFString.CFString_SummaryProvider NSString CFStringRef CFMutableStringRef")
39	debugger.HandleCommand("type summary add -F CFString.CFAttributedString_SummaryProvider NSAttributedString")
40
41class CFStringSynthProvider:
42	def __init__(self,valobj,dict):
43		self.valobj = valobj;
44		self.update()
45
46	# children other than "content" are for debugging only and must not be used in production code
47	def num_children(self):
48		if self.invalid:
49			return 0;
50		return 6;
51
52	def read_unicode(self, pointer):
53		process = self.valobj.GetTarget().GetProcess()
54		error = lldb.SBError()
55		pystr = u''
56		# cannot do the read at once because the length value has
57		# a weird encoding. better play it safe here
58		while True:
59			content = process.ReadMemory(pointer, 2, error)
60			new_bytes = bytearray(content)
61			b0 = new_bytes[0]
62			b1 = new_bytes[1]
63			pointer = pointer + 2
64			if b0 == 0 and b1 == 0:
65				break
66			# rearrange bytes depending on endianness
67			# (do we really need this or is Cocoa going to
68			#  use Windows-compatible little-endian even
69			#  if the target is big endian?)
70			if self.is_little:
71				value = b1 * 256 + b0
72			else:
73				value = b0 * 256 + b1
74			pystr = pystr + unichr(value)
75		return pystr
76
77	# handle the special case strings
78	# only use the custom code for the tested LP64 case
79	def handle_special(self):
80		if self.is_64_bit == False:
81			# for 32bit targets, use safe ObjC code
82			return self.handle_unicode_string_safe()
83		offset = 12
84		pointer = self.valobj.GetValueAsUnsigned(0) + offset
85		pystr = self.read_unicode(pointer)
86		return self.valobj.CreateValueFromExpression("content",
87			"(char*)\"" + pystr.encode('utf-8') + "\"")
88
89	# last resort call, use ObjC code to read; the final aim is to
90	# be able to strip this call away entirely and only do the read
91	# ourselves
92	def handle_unicode_string_safe(self):
93		return self.valobj.CreateValueFromExpression("content",
94			"(char*)\"" + self.valobj.GetObjectDescription() + "\"");
95
96	def handle_unicode_string(self):
97		# step 1: find offset
98		if self.inline:
99			pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base();
100			if self.explicit == False:
101				# untested, use the safe code path
102				return self.handle_unicode_string_safe();
103			else:
104				# not sure why 8 bytes are skipped here
105				# (lldb) mem read -c 50 0x00000001001154f0
106				# 0x1001154f0: 98 1a 85 71 ff 7f 00 00 90 07 00 00 01 00 00 00  ...q?...........
107				# 0x100115500: 03 00 00 00 00 00 00 00 *c3 03 78 00 78 00 00 00  ........?.x.x...
108				# 0x100115510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
109				# 0x100115520: 00 00                                            ..
110				# content begins at * (i.e. 8 bytes into variants, skipping void* buffer in
111				# __notInlineImmutable1 entirely, while the length byte is correctly located
112				# for an inline string)
113				# on NMOS in 32 bit mode, we need to skip 4 bytes instead of why
114				# if the same occurs on Lion, then this simply needs to be pointer + pointer_size
115				if self.is_64_bit == False and objc_runtime.Utilities.check_is_osx_lion(self.valobj.GetTarget()) == False:
116					pointer = pointer + 4
117				else:
118					pointer = pointer + 8;
119		else:
120			pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base();
121			# read 8 bytes here and make an address out of them
122			try:
123			    vopointer = self.valobj.CreateChildAtOffset("dummy",
124				pointer,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType());
125			    pointer = vopointer.GetValueAsUnsigned(0)
126			except:
127			    return self.valobj.CreateValueFromExpression("content",
128                                                             '(char*)"@\"invalid NSString\""')
129		# step 2: read Unicode data at pointer
130		pystr = self.read_unicode(pointer)
131		# step 3: return it
132		return self.valobj.CreateValueFromExpression("content",
133			"(char*)\"" + pystr.encode('utf-8') + "\"")
134
135	def handle_inline_explicit(self):
136		if self.is_64_bit:
137			offset = 24
138		else:
139			offset = 12
140		offset = offset + self.valobj.GetValueAsUnsigned(0)
141		return self.valobj.CreateValueFromExpression("content",
142				"(char*)(" + str(offset) + ")")
143
144	def handle_mutable_string(self):
145		if self.is_64_bit:
146			offset = 16
147		else:
148			offset = 8
149		data = self.valobj.CreateChildAtOffset("content",
150			offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType());
151		data_value = data.GetValueAsUnsigned(0)
152		data_value = data_value + 1
153		return self.valobj.CreateValueFromExpression("content", "(char*)(" + str(data_value) + ")")
154
155	def handle_UTF8_inline(self):
156		offset = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base();
157		if self.explicit == False:
158			offset = offset + 1;
159		return self.valobj.CreateValueFromAddress("content",
160				offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)).AddressOf();
161
162	def handle_UTF8_not_inline(self):
163		offset = self.size_of_cfruntime_base();
164		return self.valobj.CreateChildAtOffset("content",
165				offset,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType());
166
167	def get_child_at_index(self,index):
168		if index == 0:
169			return self.valobj.CreateValueFromExpression("mutable",
170				str(int(self.mutable)));
171		if index == 1:
172			return self.valobj.CreateValueFromExpression("inline",
173				str(int(self.inline)));
174		if index == 2:
175			return self.valobj.CreateValueFromExpression("explicit",
176				str(int(self.explicit)));
177		if index == 3:
178			return self.valobj.CreateValueFromExpression("unicode",
179				str(int(self.unicode)));
180		if index == 4:
181			return self.valobj.CreateValueFromExpression("special",
182				str(int(self.special)));
183		if index == 5:
184			# we are handling the several possible combinations of flags.
185			# for each known combination we have a function that knows how to
186			# go fetch the data from memory instead of running code. if a string is not
187			# correctly displayed, one should start by finding a combination of flags that
188			# makes it different from these known cases, and provide a new reader function
189			# if this is not possible, a new flag might have to be made up (like the "special" flag
190			# below, which is not a real flag in CFString), or alternatively one might need to use
191			# the ObjC runtime helper to detect the new class and deal with it accordingly
192			if self.mutable == True:
193				return self.handle_mutable_string()
194			elif self.inline == True and self.explicit == True and \
195			   self.unicode == False and self.special == False and \
196			   self.mutable == False:
197				return self.handle_inline_explicit()
198			elif self.unicode == True:
199				return self.handle_unicode_string();
200			elif self.special == True:
201				return self.handle_special();
202			elif self.inline == True:
203				return self.handle_UTF8_inline();
204			else:
205				return self.handle_UTF8_not_inline();
206
207	def get_child_index(self,name):
208		if name == "content":
209			return self.num_children() - 1;
210		if name == "mutable":
211			return 0;
212		if name == "inline":
213			return 1;
214		if name == "explicit":
215			return 2;
216		if name == "unicode":
217			return 3;
218		if name == "special":
219			return 4;
220
221	def is_64bit(self):
222		return self.valobj.GetTarget().GetProcess().GetAddressByteSize() == 8
223
224	def is_little_endian(self):
225		return self.valobj.GetTarget().GetProcess().GetByteOrder() == lldb.eByteOrderLittle
226
227	# CFRuntimeBase is defined as having an additional
228	# 4 bytes (padding?) on LP64 architectures
229	# to get its size we add up sizeof(pointer)+4
230	# and then add 4 more bytes if we are on a 64bit system
231	def size_of_cfruntime_base(self):
232		if self.is_64_bit == True:
233			return 8+4+4;
234		else:
235			return 4+4;
236
237	# the info bits are part of the CFRuntimeBase structure
238	# to get at them we have to skip a uintptr_t and then get
239	# at the least-significant byte of a 4 byte array. If we are
240	# on big-endian this means going to byte 3, if we are on
241	# little endian (OSX & iOS), this means reading byte 0
242	def offset_of_info_bits(self):
243		if self.is_64_bit == True:
244			offset = 8;
245		else:
246			offset = 4;
247		if self.is_little == False:
248			offset = offset + 3;
249		return offset;
250
251	def read_info_bits(self):
252		cfinfo = self.valobj.CreateChildAtOffset("cfinfo",
253					self.offset_of_info_bits(),
254					self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar));
255		cfinfo.SetFormat(11)
256		info = cfinfo.GetValue();
257		if info != None:
258			self.invalid = False;
259			return int(info,0);
260		else:
261			self.invalid = True;
262			return None;
263
264	# calculating internal flag bits of the CFString object
265	# this stuff is defined and discussed in CFString.c
266	def is_mutable(self):
267		return (self.info_bits & 1) == 1;
268
269	def is_inline(self):
270		return (self.info_bits & 0x60) == 0;
271
272	# this flag's name is ambiguous, it turns out
273	# we must skip a length byte to get at the data
274	# when this flag is False
275	def has_explicit_length(self):
276		return (self.info_bits & (1 | 4)) != 4;
277
278	# probably a subclass of NSString. obtained this from [str pathExtension]
279	# here info_bits = 0 and Unicode data at the start of the padding word
280	# in the long run using the isa value might be safer as a way to identify this
281	# instead of reading the info_bits
282	def is_special_case(self):
283		return self.info_bits == 0;
284
285	def is_unicode(self):
286		return (self.info_bits & 0x10) == 0x10;
287
288	# preparing ourselves to read into memory
289	# by adjusting architecture-specific info
290	def adjust_for_architecture(self):
291		self.is_64_bit = self.is_64bit();
292		self.is_little = self.is_little_endian();
293
294	# reading info bits out of the CFString and computing
295	# useful values to get at the real data
296	def compute_flags(self):
297		self.info_bits = self.read_info_bits();
298		if self.info_bits == None:
299			return;
300		self.mutable = self.is_mutable();
301		self.inline = self.is_inline();
302		self.explicit = self.has_explicit_length();
303		self.unicode = self.is_unicode();
304		self.special = self.is_special_case();
305
306	def update(self):
307		self.adjust_for_architecture();
308		self.compute_flags();