xref: /llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h (revision 0f783599a4c645d8ae826f990f7b938fac6e5dae)
1 //===-- PythonDataObjects.h--------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 //
10 // !! FIXME FIXME FIXME !!
11 //
12 // Python APIs nearly all can return an exception.   They do this
13 // by returning NULL, or -1, or some such value and setting
14 // the exception state with PyErr_Set*().   Exceptions must be
15 // handled before further python API functions are called.   Failure
16 // to do so will result in asserts on debug builds of python.
17 // It will also sometimes, but not usually result in crashes of
18 // release builds.
19 //
20 // Nearly all the code in this header does not handle python exceptions
21 // correctly.  It should all be converted to return Expected<> or
22 // Error types to capture the exception.
23 //
24 // Everything in this file except functions that return Error or
25 // Expected<> is considered deprecated and should not be
26 // used in new code.  If you need to use it, fix it first.
27 //
28 //
29 // TODOs for this file
30 //
31 // * Make all methods safe for exceptions.
32 //
33 // * Eliminate method signatures that must translate exceptions into
34 //   empty objects or NULLs.   Almost everything here should return
35 //   Expected<>.   It should be acceptable for certain operations that
36 //   can never fail to assert instead, such as the creation of
37 //   PythonString from a string literal.
38 //
39 // * Elimintate Reset(), and make all non-default constructors private.
40 //   Python objects should be created with Retain<> or Take<>, and they
41 //   should be assigned with operator=
42 //
43 // * Eliminate default constructors, make python objects always
44 //   nonnull, and use optionals where necessary.
45 //
46 
47 
48 #ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
49 #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
50 
51 #ifndef LLDB_DISABLE_PYTHON
52 
53 // LLDB Python header must be included first
54 #include "lldb-python.h"
55 
56 #include "lldb/Host/File.h"
57 #include "lldb/Utility/StructuredData.h"
58 
59 #include "llvm/ADT/ArrayRef.h"
60 
61 namespace lldb_private {
62 
63 class PythonObject;
64 class PythonBytes;
65 class PythonString;
66 class PythonList;
67 class PythonDictionary;
68 class PythonInteger;
69 class PythonException;
70 
71 class StructuredPythonObject : public StructuredData::Generic {
72 public:
73   StructuredPythonObject() : StructuredData::Generic() {}
74 
75   StructuredPythonObject(void *obj) : StructuredData::Generic(obj) {
76     Py_XINCREF(GetValue());
77   }
78 
79   ~StructuredPythonObject() override {
80     if (Py_IsInitialized())
81       Py_XDECREF(GetValue());
82     SetValue(nullptr);
83   }
84 
85   bool IsValid() const override { return GetValue() && GetValue() != Py_None; }
86 
87   void Serialize(llvm::json::OStream &s) const override;
88 
89 private:
90   DISALLOW_COPY_AND_ASSIGN(StructuredPythonObject);
91 };
92 
93 enum class PyObjectType {
94   Unknown,
95   None,
96   Boolean,
97   Integer,
98   Dictionary,
99   List,
100   String,
101   Bytes,
102   ByteArray,
103   Module,
104   Callable,
105   Tuple,
106   File
107 };
108 
109 enum class PyRefType {
110   Borrowed, // We are not given ownership of the incoming PyObject.
111             // We cannot safely hold it without calling Py_INCREF.
112   Owned     // We have ownership of the incoming PyObject.  We should
113             // not call Py_INCREF.
114 };
115 
116 namespace python {
117 
118 // Take a reference that you already own, and turn it into
119 // a PythonObject.
120 //
121 // Most python API methods will return a +1 reference
122 // if they succeed or NULL if and only if
123 // they set an exception.   Use this to collect such return
124 // values, after checking for NULL.
125 //
126 // If T is not just PythonObject, then obj must be already be
127 // checked to be of the correct type.
128 template <typename T> T Take(PyObject *obj) {
129   assert(obj);
130   assert(!PyErr_Occurred());
131   T thing(PyRefType::Owned, obj);
132   assert(thing.IsValid());
133   return std::move(thing);
134 }
135 
136 // Retain a reference you have borrowed, and turn it into
137 // a PythonObject.
138 //
139 // A minority of python APIs return a borrowed reference
140 // instead of a +1.   They will also return NULL if and only
141 // if they set an exception.   Use this to collect such return
142 // values, after checking for NULL.
143 //
144 // If T is not just PythonObject, then obj must be already be
145 // checked to be of the correct type.
146 template <typename T> T Retain(PyObject *obj) {
147   assert(obj);
148   assert(!PyErr_Occurred());
149   T thing(PyRefType::Borrowed, obj);
150   assert(thing.IsValid());
151   return std::move(thing);
152 }
153 
154 } // namespace python
155 
156 enum class PyInitialValue { Invalid, Empty };
157 
158 template <typename T, typename Enable = void> struct PythonFormat;
159 
160 template <> struct PythonFormat<unsigned long long> {
161   static constexpr char format = 'K';
162   static auto get(unsigned long long value) { return value; }
163 };
164 
165 template <> struct PythonFormat<long long> {
166   static constexpr char format = 'L';
167   static auto get(long long value) { return value; }
168 };
169 
170 template <typename T>
171 struct PythonFormat<
172     T, typename std::enable_if<std::is_base_of<PythonObject, T>::value>::type> {
173   static constexpr char format = 'O';
174   static auto get(const T &value) { return value.get(); }
175 };
176 
177 class PythonObject {
178 public:
179   PythonObject() : m_py_obj(nullptr) {}
180 
181   PythonObject(PyRefType type, PyObject *py_obj) : m_py_obj(nullptr) {
182     Reset(type, py_obj);
183   }
184 
185   PythonObject(const PythonObject &rhs) : m_py_obj(nullptr) { Reset(rhs); }
186 
187   PythonObject(PythonObject &&rhs) {
188     m_py_obj = rhs.m_py_obj;
189     rhs.m_py_obj = nullptr;
190   }
191 
192   ~PythonObject() { Reset(); }
193 
194   void Reset() {
195     if (m_py_obj && Py_IsInitialized())
196       Py_DECREF(m_py_obj);
197     m_py_obj = nullptr;
198   }
199 
200   void Reset(const PythonObject &rhs) {
201     if (!rhs.IsValid())
202       Reset();
203     else
204       Reset(PyRefType::Borrowed, rhs.m_py_obj);
205   }
206 
207   // PythonObject is implicitly convertible to PyObject *, which will call the
208   // wrong overload.  We want to explicitly disallow this, since a PyObject
209   // *always* owns its reference.  Therefore the overload which takes a
210   // PyRefType doesn't make sense, and the copy constructor should be used.
211   void Reset(PyRefType type, const PythonObject &ref) = delete;
212 
213   void Reset(PyRefType type, PyObject *py_obj) {
214     if (py_obj == m_py_obj)
215       return;
216 
217     if (Py_IsInitialized())
218       Py_XDECREF(m_py_obj);
219 
220     m_py_obj = py_obj;
221 
222     // If this is a borrowed reference, we need to convert it to
223     // an owned reference by incrementing it.  If it is an owned
224     // reference (for example the caller allocated it with PyDict_New()
225     // then we must *not* increment it.
226     if (m_py_obj && Py_IsInitialized() && type == PyRefType::Borrowed)
227       Py_XINCREF(m_py_obj);
228   }
229 
230   void Dump() const {
231     if (m_py_obj)
232       _PyObject_Dump(m_py_obj);
233     else
234       puts("NULL");
235   }
236 
237   void Dump(Stream &strm) const;
238 
239   PyObject *get() const { return m_py_obj; }
240 
241   PyObject *release() {
242     PyObject *result = m_py_obj;
243     m_py_obj = nullptr;
244     return result;
245   }
246 
247   PythonObject &operator=(const PythonObject &other) {
248     Reset(PyRefType::Borrowed, other.get());
249     return *this;
250   }
251 
252   void Reset(PythonObject &&other) {
253     Reset();
254     m_py_obj = other.m_py_obj;
255     other.m_py_obj = nullptr;
256   }
257 
258   PythonObject &operator=(PythonObject &&other) {
259     Reset(std::move(other));
260     return *this;
261   }
262 
263   PyObjectType GetObjectType() const;
264 
265   PythonString Repr() const;
266 
267   PythonString Str() const;
268 
269   static PythonObject ResolveNameWithDictionary(llvm::StringRef name,
270                                                 const PythonDictionary &dict);
271 
272   template <typename T>
273   static T ResolveNameWithDictionary(llvm::StringRef name,
274                                      const PythonDictionary &dict) {
275     return ResolveNameWithDictionary(name, dict).AsType<T>();
276   }
277 
278   PythonObject ResolveName(llvm::StringRef name) const;
279 
280   template <typename T> T ResolveName(llvm::StringRef name) const {
281     return ResolveName(name).AsType<T>();
282   }
283 
284   bool HasAttribute(llvm::StringRef attribute) const;
285 
286   PythonObject GetAttributeValue(llvm::StringRef attribute) const;
287 
288   bool IsNone() const { return m_py_obj == Py_None; }
289 
290   bool IsValid() const { return m_py_obj != nullptr; }
291 
292   bool IsAllocated() const { return IsValid() && !IsNone(); }
293 
294   explicit operator bool() const { return IsValid() && !IsNone(); }
295 
296   template <typename T> T AsType() const {
297     if (!T::Check(m_py_obj))
298       return T();
299     return T(PyRefType::Borrowed, m_py_obj);
300   }
301 
302   StructuredData::ObjectSP CreateStructuredObject() const;
303 
304 protected:
305   static llvm::Error nullDeref() {
306     return llvm::createStringError(llvm::inconvertibleErrorCode(),
307                                    "A NULL PyObject* was dereferenced");
308   }
309   static llvm::Error exception(const char *s = nullptr) {
310     return llvm::make_error<PythonException>(s);
311   }
312 
313 public:
314   template <typename... T>
315   llvm::Expected<PythonObject> CallMethod(const char *name,
316                                           const T &... t) const {
317     const char format[] = {'(', PythonFormat<T>::format..., ')', 0};
318 #if PY_MAJOR_VERSION < 3
319     PyObject *obj = PyObject_CallMethod(m_py_obj, const_cast<char *>(name),
320                                         const_cast<char *>(format),
321                                         PythonFormat<T>::get(t)...);
322 #else
323     PyObject *obj =
324         PyObject_CallMethod(m_py_obj, name, format, PythonFormat<T>::get(t)...);
325 #endif
326     if (!obj)
327       return exception();
328     return python::Take<PythonObject>(obj);
329   }
330 
331   llvm::Expected<PythonObject> GetAttribute(const char *name) const {
332     if (!m_py_obj)
333       return nullDeref();
334     PyObject *obj = PyObject_GetAttrString(m_py_obj, name);
335     if (!obj)
336       return exception();
337     return python::Take<PythonObject>(obj);
338   }
339 
340   llvm::Expected<bool> IsTrue() {
341     if (!m_py_obj)
342       return nullDeref();
343     int r = PyObject_IsTrue(m_py_obj);
344     if (r < 0)
345       return exception();
346     return !!r;
347   }
348 
349   llvm::Expected<long long> AsLongLong() {
350     if (!m_py_obj)
351       return nullDeref();
352     assert(!PyErr_Occurred());
353     long long r = PyLong_AsLongLong(m_py_obj);
354     if (PyErr_Occurred())
355       return exception();
356     return r;
357   }
358 
359   llvm::Expected<bool> IsInstance(const PythonObject &cls) {
360     if (!m_py_obj || !cls.IsValid())
361       return nullDeref();
362     int r = PyObject_IsInstance(m_py_obj, cls.get());
363     if (r < 0)
364       return exception();
365     return !!r;
366   }
367 
368 protected:
369   PyObject *m_py_obj;
370 };
371 
372 namespace python {
373 
374 // This is why C++ needs monads.
375 template <typename T> llvm::Expected<T> As(llvm::Expected<PythonObject> &&obj) {
376   if (!obj)
377     return obj.takeError();
378   if (!T::Check(obj.get().get()))
379     return llvm::createStringError(llvm::inconvertibleErrorCode(),
380                                    "type error");
381   return T(PyRefType::Borrowed, std::move(obj.get().get()));
382 }
383 
384 template <> llvm::Expected<bool> As<bool>(llvm::Expected<PythonObject> &&obj);
385 
386 template <>
387 llvm::Expected<long long> As<long long>(llvm::Expected<PythonObject> &&obj);
388 
389 } // namespace python
390 
391 template <class T> class TypedPythonObject : public PythonObject {
392 public:
393   // override to perform implicit type conversions on Reset
394   // This can be eliminated once we drop python 2 support.
395   static void Convert(PyRefType &type, PyObject *&py_obj) {}
396 
397   using PythonObject::Reset;
398 
399   void Reset(PyRefType type, PyObject *py_obj) {
400     Reset();
401     if (!py_obj)
402       return;
403     T::Convert(type, py_obj);
404     if (T::Check(py_obj))
405       PythonObject::Reset(type, py_obj);
406     else if (type == PyRefType::Owned)
407       Py_DECREF(py_obj);
408   }
409 
410   TypedPythonObject(PyRefType type, PyObject *py_obj) { Reset(type, py_obj); }
411 
412   TypedPythonObject() {}
413 };
414 
415 class PythonBytes : public TypedPythonObject<PythonBytes> {
416 public:
417   using TypedPythonObject::TypedPythonObject;
418   explicit PythonBytes(llvm::ArrayRef<uint8_t> bytes);
419   PythonBytes(const uint8_t *bytes, size_t length);
420 
421   static bool Check(PyObject *py_obj);
422 
423   llvm::ArrayRef<uint8_t> GetBytes() const;
424 
425   size_t GetSize() const;
426 
427   void SetBytes(llvm::ArrayRef<uint8_t> stringbytes);
428 
429   StructuredData::StringSP CreateStructuredString() const;
430 };
431 
432 class PythonByteArray : public TypedPythonObject<PythonByteArray> {
433 public:
434   using TypedPythonObject::TypedPythonObject;
435   explicit PythonByteArray(llvm::ArrayRef<uint8_t> bytes);
436   PythonByteArray(const uint8_t *bytes, size_t length);
437   PythonByteArray(const PythonBytes &object);
438 
439   static bool Check(PyObject *py_obj);
440 
441   llvm::ArrayRef<uint8_t> GetBytes() const;
442 
443   size_t GetSize() const;
444 
445   void SetBytes(llvm::ArrayRef<uint8_t> stringbytes);
446 
447   StructuredData::StringSP CreateStructuredString() const;
448 };
449 
450 class PythonString : public TypedPythonObject<PythonString> {
451 public:
452   using TypedPythonObject::TypedPythonObject;
453   static llvm::Expected<PythonString> FromUTF8(llvm::StringRef string);
454 
455   PythonString() : TypedPythonObject() {} // MSVC requires this for some reason
456 
457   explicit PythonString(llvm::StringRef string); // safe, null on error
458 
459   static bool Check(PyObject *py_obj);
460   static void Convert(PyRefType &type, PyObject *&py_obj);
461 
462   llvm::StringRef GetString() const; // safe, empty string on error
463 
464   llvm::Expected<llvm::StringRef> AsUTF8() const;
465 
466   size_t GetSize() const;
467 
468   void SetString(llvm::StringRef string); // safe, null on error
469 
470   StructuredData::StringSP CreateStructuredString() const;
471 };
472 
473 class PythonInteger : public TypedPythonObject<PythonInteger> {
474 public:
475   using TypedPythonObject::TypedPythonObject;
476 
477   PythonInteger() : TypedPythonObject() {} // MSVC requires this for some reason
478 
479   explicit PythonInteger(int64_t value);
480 
481   static bool Check(PyObject *py_obj);
482   static void Convert(PyRefType &type, PyObject *&py_obj);
483 
484   int64_t GetInteger() const;
485 
486   void SetInteger(int64_t value);
487 
488   StructuredData::IntegerSP CreateStructuredInteger() const;
489 };
490 
491 class PythonBoolean : public TypedPythonObject<PythonBoolean> {
492 public:
493   using TypedPythonObject::TypedPythonObject;
494 
495   explicit PythonBoolean(bool value);
496 
497   static bool Check(PyObject *py_obj);
498 
499   bool GetValue() const;
500 
501   void SetValue(bool value);
502 
503   StructuredData::BooleanSP CreateStructuredBoolean() const;
504 };
505 
506 class PythonList : public TypedPythonObject<PythonList> {
507 public:
508   using TypedPythonObject::TypedPythonObject;
509 
510   PythonList() : TypedPythonObject() {} // MSVC requires this for some reason
511 
512   explicit PythonList(PyInitialValue value);
513   explicit PythonList(int list_size);
514 
515   static bool Check(PyObject *py_obj);
516 
517   uint32_t GetSize() const;
518 
519   PythonObject GetItemAtIndex(uint32_t index) const;
520 
521   void SetItemAtIndex(uint32_t index, const PythonObject &object);
522 
523   void AppendItem(const PythonObject &object);
524 
525   StructuredData::ArraySP CreateStructuredArray() const;
526 };
527 
528 class PythonTuple : public TypedPythonObject<PythonTuple> {
529 public:
530   using TypedPythonObject::TypedPythonObject;
531 
532   explicit PythonTuple(PyInitialValue value);
533   explicit PythonTuple(int tuple_size);
534   PythonTuple(std::initializer_list<PythonObject> objects);
535   PythonTuple(std::initializer_list<PyObject *> objects);
536 
537   static bool Check(PyObject *py_obj);
538 
539   uint32_t GetSize() const;
540 
541   PythonObject GetItemAtIndex(uint32_t index) const;
542 
543   void SetItemAtIndex(uint32_t index, const PythonObject &object);
544 
545   StructuredData::ArraySP CreateStructuredArray() const;
546 };
547 
548 class PythonDictionary : public TypedPythonObject<PythonDictionary> {
549 public:
550   using TypedPythonObject::TypedPythonObject;
551 
552   PythonDictionary() : TypedPythonObject() {} // MSVC requires this for some reason
553 
554   explicit PythonDictionary(PyInitialValue value);
555 
556   static bool Check(PyObject *py_obj);
557 
558   uint32_t GetSize() const;
559 
560   PythonList GetKeys() const;
561 
562   PythonObject GetItemForKey(const PythonObject &key) const;
563   void SetItemForKey(const PythonObject &key, const PythonObject &value);
564 
565   StructuredData::DictionarySP CreateStructuredDictionary() const;
566 };
567 
568 class PythonModule : public TypedPythonObject<PythonModule> {
569 public:
570   using TypedPythonObject::TypedPythonObject;
571 
572   static bool Check(PyObject *py_obj);
573 
574   static PythonModule BuiltinsModule();
575 
576   static PythonModule MainModule();
577 
578   static PythonModule AddModule(llvm::StringRef module);
579 
580   // safe, returns invalid on error;
581   static PythonModule ImportModule(llvm::StringRef name) {
582     std::string s = name;
583     auto mod = Import(s.c_str());
584     if (!mod) {
585       llvm::consumeError(mod.takeError());
586       return PythonModule();
587     }
588     return std::move(mod.get());
589   }
590 
591   static llvm::Expected<PythonModule> Import(const char *name);
592 
593   llvm::Expected<PythonObject> Get(const char *name);
594 
595   PythonDictionary GetDictionary() const;
596 };
597 
598 class PythonCallable : public TypedPythonObject<PythonCallable> {
599 public:
600   using TypedPythonObject::TypedPythonObject;
601 
602   struct ArgInfo {
603     size_t count;
604     bool is_bound_method : 1;
605     bool has_varargs : 1;
606     bool has_kwargs : 1;
607   };
608 
609   static bool Check(PyObject *py_obj);
610 
611   ArgInfo GetNumArguments() const;
612 
613   // If the callable is a Py_Class, then find the number of arguments
614   // of the __init__ method.
615   ArgInfo GetNumInitArguments() const;
616 
617   PythonObject operator()();
618 
619   PythonObject operator()(std::initializer_list<PyObject *> args);
620 
621   PythonObject operator()(std::initializer_list<PythonObject> args);
622 
623   template <typename Arg, typename... Args>
624   PythonObject operator()(const Arg &arg, Args... args) {
625     return operator()({arg, args...});
626   }
627 };
628 
629 class PythonFile : public TypedPythonObject<PythonFile> {
630 public:
631   using TypedPythonObject::TypedPythonObject;
632 
633   PythonFile() : TypedPythonObject() {} // MSVC requires this for some reason
634 
635   static bool Check(PyObject *py_obj);
636 
637   static llvm::Expected<PythonFile> FromFile(File &file,
638                                              const char *mode = nullptr);
639 
640   llvm::Expected<lldb::FileSP> ConvertToFile(bool borrowed = false);
641   llvm::Expected<lldb::FileSP>
642   ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed = false);
643 };
644 
645 class PythonException : public llvm::ErrorInfo<PythonException> {
646 private:
647   PyObject *m_exception_type, *m_exception, *m_traceback;
648   PyObject *m_repr_bytes;
649 
650 public:
651   static char ID;
652   const char *toCString() const;
653   PythonException(const char *caller = nullptr);
654   void Restore();
655   ~PythonException();
656   void log(llvm::raw_ostream &OS) const override;
657   std::error_code convertToErrorCode() const override;
658 };
659 
660 // This extracts the underlying T out of an Expected<T> and returns it.
661 // If the Expected is an Error instead of a T, that error will be converted
662 // into a python exception, and this will return a default-constructed T.
663 //
664 // This is appropriate for use right at the boundary of python calling into
665 // C++, such as in a SWIG typemap.   In such a context you should simply
666 // check if the returned T is valid, and if it is, return a NULL back
667 // to python.   This will result in the Error being raised as an exception
668 // from python code's point of view.
669 //
670 // For example:
671 // ```
672 // Expected<Foo *> efoop = some_cpp_function();
673 // Foo *foop = unwrapOrSetPythonException(efoop);
674 // if (!foop)
675 //    return NULL;
676 // do_something(*foop);
677 //
678 // If the Error returned was itself created because a python exception was
679 // raised when C++ code called into python, then the original exception
680 // will be restored.   Otherwise a simple string exception will be raised.
681 template <typename T> T unwrapOrSetPythonException(llvm::Expected<T> expected) {
682   if (expected)
683     return expected.get();
684   llvm::handleAllErrors(
685       expected.takeError(), [](PythonException &E) { E.Restore(); },
686       [](const llvm::ErrorInfoBase &E) {
687         PyErr_SetString(PyExc_Exception, E.message().c_str());
688       });
689   return T();
690 }
691 
692 } // namespace lldb_private
693 
694 #endif
695 
696 #endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
697