1 //===-- ClangExpressionSourceCode.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ClangExpressionSourceCode.h"
10 
11 #include "clang/Basic/CharInfo.h"
12 #include "clang/Basic/FileManager.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "clang/Lex/Lexer.h"
15 #include "llvm/ADT/StringRef.h"
16 
17 #include "Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h"
18 #include "Plugins/ExpressionParser/Clang/ClangPersistentVariables.h"
19 #include "lldb/Symbol/Block.h"
20 #include "lldb/Symbol/CompileUnit.h"
21 #include "lldb/Symbol/DebugMacros.h"
22 #include "lldb/Symbol/TypeSystem.h"
23 #include "lldb/Symbol/VariableList.h"
24 #include "lldb/Target/ExecutionContext.h"
25 #include "lldb/Target/Language.h"
26 #include "lldb/Target/Platform.h"
27 #include "lldb/Target/StackFrame.h"
28 #include "lldb/Target/Target.h"
29 #include "lldb/Utility/StreamString.h"
30 
31 using namespace lldb_private;
32 
33 #define PREFIX_NAME "<lldb wrapper prefix>"
34 #define SUFFIX_NAME "<lldb wrapper suffix>"
35 
36 const llvm::StringRef ClangExpressionSourceCode::g_prefix_file_name = PREFIX_NAME;
37 
38 const char *ClangExpressionSourceCode::g_expression_prefix =
39 "#line 1 \"" PREFIX_NAME R"("
40 #ifndef offsetof
41 #define offsetof(t, d) __builtin_offsetof(t, d)
42 #endif
43 #ifndef NULL
44 #define NULL (__null)
45 #endif
46 #ifndef Nil
47 #define Nil (__null)
48 #endif
49 #ifndef nil
50 #define nil (__null)
51 #endif
52 #ifndef YES
53 #define YES ((BOOL)1)
54 #endif
55 #ifndef NO
56 #define NO ((BOOL)0)
57 #endif
58 typedef __INT8_TYPE__ int8_t;
59 typedef __UINT8_TYPE__ uint8_t;
60 typedef __INT16_TYPE__ int16_t;
61 typedef __UINT16_TYPE__ uint16_t;
62 typedef __INT32_TYPE__ int32_t;
63 typedef __UINT32_TYPE__ uint32_t;
64 typedef __INT64_TYPE__ int64_t;
65 typedef __UINT64_TYPE__ uint64_t;
66 typedef __INTPTR_TYPE__ intptr_t;
67 typedef __UINTPTR_TYPE__ uintptr_t;
68 typedef __SIZE_TYPE__ size_t;
69 typedef __PTRDIFF_TYPE__ ptrdiff_t;
70 typedef unsigned short unichar;
71 extern "C"
72 {
73     int printf(const char * __restrict, ...);
74 }
75 )";
76 
77 const char *ClangExpressionSourceCode::g_expression_suffix =
78     "\n;\n#line 1 \"" SUFFIX_NAME "\"\n";
79 
80 namespace {
81 
82 class AddMacroState {
83   enum State {
84     CURRENT_FILE_NOT_YET_PUSHED,
85     CURRENT_FILE_PUSHED,
86     CURRENT_FILE_POPPED
87   };
88 
89 public:
90   AddMacroState(const FileSpec &current_file, const uint32_t current_file_line)
91       : m_current_file(current_file), m_current_file_line(current_file_line) {}
92 
93   void StartFile(const FileSpec &file) {
94     m_file_stack.push_back(file);
95     if (file == m_current_file)
96       m_state = CURRENT_FILE_PUSHED;
97   }
98 
99   void EndFile() {
100     if (m_file_stack.size() == 0)
101       return;
102 
103     FileSpec old_top = m_file_stack.back();
104     m_file_stack.pop_back();
105     if (old_top == m_current_file)
106       m_state = CURRENT_FILE_POPPED;
107   }
108 
109   // An entry is valid if it occurs before the current line in the current
110   // file.
111   bool IsValidEntry(uint32_t line) {
112     switch (m_state) {
113     case CURRENT_FILE_NOT_YET_PUSHED:
114       return true;
115     case CURRENT_FILE_PUSHED:
116       // If we are in file included in the current file, the entry should be
117       // added.
118       if (m_file_stack.back() != m_current_file)
119         return true;
120 
121       return line < m_current_file_line;
122     default:
123       return false;
124     }
125   }
126 
127 private:
128   std::vector<FileSpec> m_file_stack;
129   State m_state = CURRENT_FILE_NOT_YET_PUSHED;
130   FileSpec m_current_file;
131   uint32_t m_current_file_line;
132 };
133 
134 } // anonymous namespace
135 
136 static void AddMacros(const DebugMacros *dm, CompileUnit *comp_unit,
137                       AddMacroState &state, StreamString &stream) {
138   if (dm == nullptr)
139     return;
140 
141   for (size_t i = 0; i < dm->GetNumMacroEntries(); i++) {
142     const DebugMacroEntry &entry = dm->GetMacroEntryAtIndex(i);
143     uint32_t line;
144 
145     switch (entry.GetType()) {
146     case DebugMacroEntry::DEFINE:
147       if (state.IsValidEntry(entry.GetLineNumber()))
148         stream.Printf("#define %s\n", entry.GetMacroString().AsCString());
149       else
150         return;
151       break;
152     case DebugMacroEntry::UNDEF:
153       if (state.IsValidEntry(entry.GetLineNumber()))
154         stream.Printf("#undef %s\n", entry.GetMacroString().AsCString());
155       else
156         return;
157       break;
158     case DebugMacroEntry::START_FILE:
159       line = entry.GetLineNumber();
160       if (state.IsValidEntry(line))
161         state.StartFile(entry.GetFileSpec(comp_unit));
162       else
163         return;
164       break;
165     case DebugMacroEntry::END_FILE:
166       state.EndFile();
167       break;
168     case DebugMacroEntry::INDIRECT:
169       AddMacros(entry.GetIndirectDebugMacros(), comp_unit, state, stream);
170       break;
171     default:
172       // This is an unknown/invalid entry. Ignore.
173       break;
174     }
175   }
176 }
177 
178 lldb_private::ClangExpressionSourceCode::ClangExpressionSourceCode(
179     llvm::StringRef filename, llvm::StringRef name, llvm::StringRef prefix,
180     llvm::StringRef body, Wrapping wrap, WrapKind wrap_kind)
181     : ExpressionSourceCode(name, prefix, body, wrap), m_wrap_kind(wrap_kind) {
182   // Use #line markers to pretend that we have a single-line source file
183   // containing only the user expression. This will hide our wrapper code
184   // from the user when we render diagnostics with Clang.
185   m_start_marker = "#line 1 \"" + filename.str() + "\"\n";
186   m_end_marker = g_expression_suffix;
187 }
188 
189 namespace {
190 /// Allows checking if a token is contained in a given expression.
191 class TokenVerifier {
192   /// The tokens we found in the expression.
193   llvm::StringSet<> m_tokens;
194 
195 public:
196   TokenVerifier(std::string body);
197   /// Returns true iff the given expression body contained a token with the
198   /// given content.
199   bool hasToken(llvm::StringRef token) const {
200     return m_tokens.find(token) != m_tokens.end();
201   }
202 };
203 } // namespace
204 
205 TokenVerifier::TokenVerifier(std::string body) {
206   using namespace clang;
207 
208   // We only care about tokens and not their original source locations. If we
209   // move the whole expression to only be in one line we can simplify the
210   // following code that extracts the token contents.
211   std::replace(body.begin(), body.end(), '\n', ' ');
212   std::replace(body.begin(), body.end(), '\r', ' ');
213 
214   FileSystemOptions file_opts;
215   FileManager file_mgr(file_opts,
216                        FileSystem::Instance().GetVirtualFileSystem());
217 
218   // Let's build the actual source code Clang needs and setup some utility
219   // objects.
220   llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
221   llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(
222       new DiagnosticOptions());
223   DiagnosticsEngine diags(diag_ids, diags_opts);
224   clang::SourceManager SM(diags, file_mgr);
225   auto buf = llvm::MemoryBuffer::getMemBuffer(body);
226 
227   FileID FID = SM.createFileID(buf->getMemBufferRef());
228 
229   // Let's just enable the latest ObjC and C++ which should get most tokens
230   // right.
231   LangOptions Opts;
232   Opts.ObjC = true;
233   Opts.DollarIdents = true;
234   Opts.CPlusPlus17 = true;
235   Opts.LineComment = true;
236 
237   Lexer lex(FID, buf->getMemBufferRef(), SM, Opts);
238 
239   Token token;
240   bool exit = false;
241   while (!exit) {
242     // Returns true if this is the last token we get from the lexer.
243     exit = lex.LexFromRawLexer(token);
244 
245     // Extract the column number which we need to extract the token content.
246     // Our expression is just one line, so we don't need to handle any line
247     // numbers here.
248     bool invalid = false;
249     unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
250     if (invalid)
251       continue;
252     // Column numbers start at 1, but indexes in our string start at 0.
253     --start;
254 
255     // Annotations don't have a length, so let's skip them.
256     if (token.isAnnotation())
257       continue;
258 
259     // Extract the token string from our source code and store it.
260     std::string token_str = body.substr(start, token.getLength());
261     if (token_str.empty())
262       continue;
263     m_tokens.insert(token_str);
264   }
265 }
266 
267 void ClangExpressionSourceCode::AddLocalVariableDecls(
268     const lldb::VariableListSP &var_list_sp, StreamString &stream,
269     const std::string &expr) const {
270   TokenVerifier tokens(expr);
271 
272   for (size_t i = 0; i < var_list_sp->GetSize(); i++) {
273     lldb::VariableSP var_sp = var_list_sp->GetVariableAtIndex(i);
274 
275     ConstString var_name = var_sp->GetName();
276 
277 
278     // We can check for .block_descriptor w/o checking for langauge since this
279     // is not a valid identifier in either C or C++.
280     if (!var_name || var_name == ".block_descriptor")
281       continue;
282 
283     if (!expr.empty() && !tokens.hasToken(var_name.GetStringRef()))
284       continue;
285 
286     const bool is_objc = m_wrap_kind == WrapKind::ObjCInstanceMethod ||
287                          m_wrap_kind == WrapKind::ObjCStaticMethod;
288     if ((var_name == "self" || var_name == "_cmd") && is_objc)
289       continue;
290 
291     if (var_name == "this" && m_wrap_kind == WrapKind::CppMemberFunction)
292       continue;
293 
294     stream.Printf("using $__lldb_local_vars::%s;\n", var_name.AsCString());
295   }
296 }
297 
298 bool ClangExpressionSourceCode::GetText(
299     std::string &text, ExecutionContext &exe_ctx, bool add_locals,
300     bool force_add_all_locals, llvm::ArrayRef<std::string> modules) const {
301   const char *target_specific_defines = "typedef signed char BOOL;\n";
302   std::string module_macros;
303   llvm::raw_string_ostream module_macros_stream(module_macros);
304 
305   Target *target = exe_ctx.GetTargetPtr();
306   if (target) {
307     if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||
308         target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {
309       target_specific_defines = "typedef bool BOOL;\n";
310     }
311     if (target->GetArchitecture().GetMachine() == llvm::Triple::x86_64) {
312       if (lldb::PlatformSP platform_sp = target->GetPlatform()) {
313         if (platform_sp->GetPluginName() == "ios-simulator") {
314           target_specific_defines = "typedef bool BOOL;\n";
315         }
316       }
317     }
318 
319     auto *persistent_vars = llvm::cast<ClangPersistentVariables>(
320         target->GetPersistentExpressionStateForLanguage(lldb::eLanguageTypeC));
321     std::shared_ptr<ClangModulesDeclVendor> decl_vendor =
322         persistent_vars->GetClangModulesDeclVendor();
323     if (decl_vendor) {
324       const ClangModulesDeclVendor::ModuleVector &hand_imported_modules =
325           persistent_vars->GetHandLoadedClangModules();
326       ClangModulesDeclVendor::ModuleVector modules_for_macros;
327 
328       for (ClangModulesDeclVendor::ModuleID module : hand_imported_modules) {
329         modules_for_macros.push_back(module);
330       }
331 
332       if (target->GetEnableAutoImportClangModules()) {
333         if (StackFrame *frame = exe_ctx.GetFramePtr()) {
334           if (Block *block = frame->GetFrameBlock()) {
335             SymbolContext sc;
336 
337             block->CalculateSymbolContext(&sc);
338 
339             if (sc.comp_unit) {
340               StreamString error_stream;
341 
342               decl_vendor->AddModulesForCompileUnit(
343                   *sc.comp_unit, modules_for_macros, error_stream);
344             }
345           }
346         }
347       }
348 
349       decl_vendor->ForEachMacro(
350           modules_for_macros,
351           [&module_macros_stream](llvm::StringRef token,
352                                   llvm::StringRef expansion) -> bool {
353             // Check if the macro hasn't already been defined in the
354             // g_expression_prefix (which defines a few builtin macros).
355             module_macros_stream << "#ifndef " << token << "\n";
356             module_macros_stream << expansion << "\n";
357             module_macros_stream << "#endif\n";
358             return false;
359           });
360     }
361   }
362 
363   StreamString debug_macros_stream;
364   StreamString lldb_local_var_decls;
365   if (StackFrame *frame = exe_ctx.GetFramePtr()) {
366     const SymbolContext &sc = frame->GetSymbolContext(
367         lldb::eSymbolContextCompUnit | lldb::eSymbolContextLineEntry);
368 
369     if (sc.comp_unit && sc.line_entry.IsValid()) {
370       DebugMacros *dm = sc.comp_unit->GetDebugMacros();
371       if (dm) {
372         AddMacroState state(sc.line_entry.file, sc.line_entry.line);
373         AddMacros(dm, sc.comp_unit, state, debug_macros_stream);
374       }
375     }
376 
377     if (add_locals)
378       if (target->GetInjectLocalVariables(&exe_ctx)) {
379         lldb::VariableListSP var_list_sp =
380             frame->GetInScopeVariableList(false, true);
381         AddLocalVariableDecls(var_list_sp, lldb_local_var_decls,
382                               force_add_all_locals ? "" : m_body);
383       }
384   }
385 
386   if (m_wrap) {
387     // Generate a list of @import statements that will import the specified
388     // module into our expression.
389     std::string module_imports;
390     for (const std::string &module : modules) {
391       module_imports.append("@import ");
392       module_imports.append(module);
393       module_imports.append(";\n");
394     }
395 
396     StreamString wrap_stream;
397 
398     wrap_stream.Printf("%s\n%s\n%s\n%s\n%s\n", g_expression_prefix,
399                        module_macros.c_str(), debug_macros_stream.GetData(),
400                        target_specific_defines, m_prefix.c_str());
401 
402     // First construct a tagged form of the user expression so we can find it
403     // later:
404     std::string tagged_body;
405     tagged_body.append(m_start_marker);
406     tagged_body.append(m_body);
407     tagged_body.append(m_end_marker);
408 
409     switch (m_wrap_kind) {
410     case WrapKind::Function:
411       wrap_stream.Printf("%s"
412                          "void                           \n"
413                          "%s(void *$__lldb_arg)          \n"
414                          "{                              \n"
415                          "    %s;                        \n"
416                          "%s"
417                          "}                              \n",
418                          module_imports.c_str(), m_name.c_str(),
419                          lldb_local_var_decls.GetData(), tagged_body.c_str());
420       break;
421     case WrapKind::CppMemberFunction:
422       wrap_stream.Printf("%s"
423                          "void                                   \n"
424                          "$__lldb_class::%s(void *$__lldb_arg)   \n"
425                          "{                                      \n"
426                          "    %s;                                \n"
427                          "%s"
428                          "}                                      \n",
429                          module_imports.c_str(), m_name.c_str(),
430                          lldb_local_var_decls.GetData(), tagged_body.c_str());
431       break;
432     case WrapKind::ObjCInstanceMethod:
433       wrap_stream.Printf(
434           "%s"
435           "@interface $__lldb_objc_class ($__lldb_category)       \n"
436           "-(void)%s:(void *)$__lldb_arg;                         \n"
437           "@end                                                   \n"
438           "@implementation $__lldb_objc_class ($__lldb_category)  \n"
439           "-(void)%s:(void *)$__lldb_arg                          \n"
440           "{                                                      \n"
441           "    %s;                                                \n"
442           "%s"
443           "}                                                      \n"
444           "@end                                                   \n",
445           module_imports.c_str(), m_name.c_str(), m_name.c_str(),
446           lldb_local_var_decls.GetData(), tagged_body.c_str());
447       break;
448 
449     case WrapKind::ObjCStaticMethod:
450       wrap_stream.Printf(
451           "%s"
452           "@interface $__lldb_objc_class ($__lldb_category)        \n"
453           "+(void)%s:(void *)$__lldb_arg;                          \n"
454           "@end                                                    \n"
455           "@implementation $__lldb_objc_class ($__lldb_category)   \n"
456           "+(void)%s:(void *)$__lldb_arg                           \n"
457           "{                                                       \n"
458           "    %s;                                                 \n"
459           "%s"
460           "}                                                       \n"
461           "@end                                                    \n",
462           module_imports.c_str(), m_name.c_str(), m_name.c_str(),
463           lldb_local_var_decls.GetData(), tagged_body.c_str());
464       break;
465     }
466 
467     text = std::string(wrap_stream.GetString());
468   } else {
469     text.append(m_body);
470   }
471 
472   return true;
473 }
474 
475 bool ClangExpressionSourceCode::GetOriginalBodyBounds(
476     std::string transformed_text, size_t &start_loc, size_t &end_loc) {
477   start_loc = transformed_text.find(m_start_marker);
478   if (start_loc == std::string::npos)
479     return false;
480   start_loc += m_start_marker.size();
481   end_loc = transformed_text.find(m_end_marker);
482   return end_loc != std::string::npos;
483 }
484