xref: /llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h (revision 2386537c2469a97501a305c6b3138231b907a67f)
1 //===-- PythonDataObjects.h--------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 //
10 // !! FIXME FIXME FIXME !!
11 //
12 // Python APIs nearly all can return an exception.   They do this
13 // by returning NULL, or -1, or some such value and setting
14 // the exception state with PyErr_Set*().   Exceptions must be
15 // handled before further python API functions are called.   Failure
16 // to do so will result in asserts on debug builds of python.
17 // It will also sometimes, but not usually result in crashes of
18 // release builds.
19 //
20 // Nearly all the code in this header does not handle python exceptions
21 // correctly.  It should all be converted to return Expected<> or
22 // Error types to capture the exception.
23 //
24 // Everything in this file except functions that return Error or
25 // Expected<> is considered deprecated and should not be
26 // used in new code.  If you need to use it, fix it first.
27 //
28 //
29 // TODOs for this file
30 //
31 // * Make all methods safe for exceptions.
32 //
33 // * Eliminate method signatures that must translate exceptions into
34 //   empty objects or NULLs.   Almost everything here should return
35 //   Expected<>.   It should be acceptable for certain operations that
36 //   can never fail to assert instead, such as the creation of
37 //   PythonString from a string literal.
38 //
39 // * Elimintate Reset(), and make all non-default constructors private.
40 //   Python objects should be created with Retain<> or Take<>, and they
41 //   should be assigned with operator=
42 //
43 // * Eliminate default constructors, make python objects always
44 //   nonnull, and use optionals where necessary.
45 //
46 
47 
48 #ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
49 #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
50 
51 #ifndef LLDB_DISABLE_PYTHON
52 
53 // LLDB Python header must be included first
54 #include "lldb-python.h"
55 
56 #include "lldb/Host/File.h"
57 #include "lldb/Utility/StructuredData.h"
58 
59 #include "llvm/ADT/ArrayRef.h"
60 
61 namespace lldb_private {
62 
63 class PythonObject;
64 class PythonBytes;
65 class PythonString;
66 class PythonList;
67 class PythonDictionary;
68 class PythonInteger;
69 class PythonException;
70 
71 class StructuredPythonObject : public StructuredData::Generic {
72 public:
73   StructuredPythonObject() : StructuredData::Generic() {}
74 
75   StructuredPythonObject(void *obj) : StructuredData::Generic(obj) {
76     Py_XINCREF(GetValue());
77   }
78 
79   ~StructuredPythonObject() override {
80     if (Py_IsInitialized())
81       Py_XDECREF(GetValue());
82     SetValue(nullptr);
83   }
84 
85   bool IsValid() const override { return GetValue() && GetValue() != Py_None; }
86 
87   void Serialize(llvm::json::OStream &s) const override;
88 
89 private:
90   DISALLOW_COPY_AND_ASSIGN(StructuredPythonObject);
91 };
92 
93 enum class PyObjectType {
94   Unknown,
95   None,
96   Boolean,
97   Integer,
98   Dictionary,
99   List,
100   String,
101   Bytes,
102   ByteArray,
103   Module,
104   Callable,
105   Tuple,
106   File
107 };
108 
109 enum class PyRefType {
110   Borrowed, // We are not given ownership of the incoming PyObject.
111             // We cannot safely hold it without calling Py_INCREF.
112   Owned     // We have ownership of the incoming PyObject.  We should
113             // not call Py_INCREF.
114 };
115 
116 namespace python {
117 
118 // Take a reference that you already own, and turn it into
119 // a PythonObject.
120 //
121 // Most python API methods will return a +1 reference
122 // if they succeed or NULL if and only if
123 // they set an exception.   Use this to collect such return
124 // values, after checking for NULL.
125 //
126 // If T is not just PythonObject, then obj must be already be
127 // checked to be of the correct type.
128 template <typename T> T Take(PyObject *obj) {
129   assert(obj);
130   assert(!PyErr_Occurred());
131   T thing(PyRefType::Owned, obj);
132   assert(thing.IsValid());
133   return std::move(thing);
134 }
135 
136 // Retain a reference you have borrowed, and turn it into
137 // a PythonObject.
138 //
139 // A minority of python APIs return a borrowed reference
140 // instead of a +1.   They will also return NULL if and only
141 // if they set an exception.   Use this to collect such return
142 // values, after checking for NULL.
143 //
144 // If T is not just PythonObject, then obj must be already be
145 // checked to be of the correct type.
146 template <typename T> T Retain(PyObject *obj) {
147   assert(obj);
148   assert(!PyErr_Occurred());
149   T thing(PyRefType::Borrowed, obj);
150   assert(thing.IsValid());
151   return std::move(thing);
152 }
153 
154 } // namespace python
155 
156 enum class PyInitialValue { Invalid, Empty };
157 
158 template <typename T, typename Enable = void> struct PythonFormat;
159 
160 template <> struct PythonFormat<unsigned long long> {
161   static constexpr char format = 'K';
162   static auto get(unsigned long long value) { return value; }
163 };
164 
165 template <> struct PythonFormat<long long> {
166   static constexpr char format = 'L';
167   static auto get(long long value) { return value; }
168 };
169 
170 template <typename T>
171 struct PythonFormat<
172     T, typename std::enable_if<std::is_base_of<PythonObject, T>::value>::type> {
173   static constexpr char format = 'O';
174   static auto get(const T &value) { return value.get(); }
175 };
176 
177 class PythonObject {
178 public:
179   PythonObject() : m_py_obj(nullptr) {}
180 
181   PythonObject(PyRefType type, PyObject *py_obj) : m_py_obj(nullptr) {
182     Reset(type, py_obj);
183   }
184 
185   PythonObject(const PythonObject &rhs)
186       : PythonObject(PyRefType::Borrowed, rhs.m_py_obj) {}
187 
188   PythonObject(PythonObject &&rhs) {
189     m_py_obj = rhs.m_py_obj;
190     rhs.m_py_obj = nullptr;
191   }
192 
193   ~PythonObject() { Reset(); }
194 
195   void Reset() {
196     if (m_py_obj && Py_IsInitialized())
197       Py_DECREF(m_py_obj);
198     m_py_obj = nullptr;
199   }
200 
201   void Reset(PyRefType type, PyObject *py_obj) {
202     if (py_obj == m_py_obj)
203       return;
204 
205     if (Py_IsInitialized())
206       Py_XDECREF(m_py_obj);
207 
208     m_py_obj = py_obj;
209 
210     // If this is a borrowed reference, we need to convert it to
211     // an owned reference by incrementing it.  If it is an owned
212     // reference (for example the caller allocated it with PyDict_New()
213     // then we must *not* increment it.
214     if (m_py_obj && Py_IsInitialized() && type == PyRefType::Borrowed)
215       Py_XINCREF(m_py_obj);
216   }
217 
218   void Dump() const {
219     if (m_py_obj)
220       _PyObject_Dump(m_py_obj);
221     else
222       puts("NULL");
223   }
224 
225   void Dump(Stream &strm) const;
226 
227   PyObject *get() const { return m_py_obj; }
228 
229   PyObject *release() {
230     PyObject *result = m_py_obj;
231     m_py_obj = nullptr;
232     return result;
233   }
234 
235   PythonObject &operator=(PythonObject other) {
236     Reset();
237     m_py_obj = std::exchange(other.m_py_obj, nullptr);
238     return *this;
239   }
240 
241   PyObjectType GetObjectType() const;
242 
243   PythonString Repr() const;
244 
245   PythonString Str() const;
246 
247   static PythonObject ResolveNameWithDictionary(llvm::StringRef name,
248                                                 const PythonDictionary &dict);
249 
250   template <typename T>
251   static T ResolveNameWithDictionary(llvm::StringRef name,
252                                      const PythonDictionary &dict) {
253     return ResolveNameWithDictionary(name, dict).AsType<T>();
254   }
255 
256   PythonObject ResolveName(llvm::StringRef name) const;
257 
258   template <typename T> T ResolveName(llvm::StringRef name) const {
259     return ResolveName(name).AsType<T>();
260   }
261 
262   bool HasAttribute(llvm::StringRef attribute) const;
263 
264   PythonObject GetAttributeValue(llvm::StringRef attribute) const;
265 
266   bool IsNone() const { return m_py_obj == Py_None; }
267 
268   bool IsValid() const { return m_py_obj != nullptr; }
269 
270   bool IsAllocated() const { return IsValid() && !IsNone(); }
271 
272   explicit operator bool() const { return IsValid() && !IsNone(); }
273 
274   template <typename T> T AsType() const {
275     if (!T::Check(m_py_obj))
276       return T();
277     return T(PyRefType::Borrowed, m_py_obj);
278   }
279 
280   StructuredData::ObjectSP CreateStructuredObject() const;
281 
282 protected:
283   static llvm::Error nullDeref() {
284     return llvm::createStringError(llvm::inconvertibleErrorCode(),
285                                    "A NULL PyObject* was dereferenced");
286   }
287   static llvm::Error exception(const char *s = nullptr) {
288     return llvm::make_error<PythonException>(s);
289   }
290   static llvm::Error keyError() {
291     return llvm::createStringError(llvm::inconvertibleErrorCode(),
292                                    "key not in dict");
293   }
294 
295 #if PY_MAJOR_VERSION < 3
296   // The python 2 API declares some arguments as char* that should
297   // be const char *, but it doesn't actually modify them.
298   static char *py2_const_cast(const char *s) { return const_cast<char *>(s); }
299 #else
300   static const char *py2_const_cast(const char *s) { return s; }
301 #endif
302 
303 public:
304   template <typename... T>
305   llvm::Expected<PythonObject> CallMethod(const char *name,
306                                           const T &... t) const {
307     const char format[] = {'(', PythonFormat<T>::format..., ')', 0};
308     PyObject *obj =
309         PyObject_CallMethod(m_py_obj, py2_const_cast(name),
310                             py2_const_cast(format), PythonFormat<T>::get(t)...);
311     if (!obj)
312       return exception();
313     return python::Take<PythonObject>(obj);
314   }
315 
316   template <typename... T>
317   llvm::Expected<PythonObject> Call(const T &... t) const {
318     const char format[] = {'(', PythonFormat<T>::format..., ')', 0};
319     PyObject *obj = PyObject_CallFunction(m_py_obj, py2_const_cast(format),
320                                           PythonFormat<T>::get(t)...);
321     if (!obj)
322       return exception();
323     return python::Take<PythonObject>(obj);
324   }
325 
326   llvm::Expected<PythonObject> GetAttribute(const char *name) const {
327     if (!m_py_obj)
328       return nullDeref();
329     PyObject *obj = PyObject_GetAttrString(m_py_obj, name);
330     if (!obj)
331       return exception();
332     return python::Take<PythonObject>(obj);
333   }
334 
335   llvm::Expected<bool> IsTrue() {
336     if (!m_py_obj)
337       return nullDeref();
338     int r = PyObject_IsTrue(m_py_obj);
339     if (r < 0)
340       return exception();
341     return !!r;
342   }
343 
344   llvm::Expected<long long> AsLongLong() {
345     if (!m_py_obj)
346       return nullDeref();
347     assert(!PyErr_Occurred());
348     long long r = PyLong_AsLongLong(m_py_obj);
349     if (PyErr_Occurred())
350       return exception();
351     return r;
352   }
353 
354   llvm::Expected<bool> IsInstance(const PythonObject &cls) {
355     if (!m_py_obj || !cls.IsValid())
356       return nullDeref();
357     int r = PyObject_IsInstance(m_py_obj, cls.get());
358     if (r < 0)
359       return exception();
360     return !!r;
361   }
362 
363 protected:
364   PyObject *m_py_obj;
365 };
366 
367 namespace python {
368 
369 // This is why C++ needs monads.
370 template <typename T> llvm::Expected<T> As(llvm::Expected<PythonObject> &&obj) {
371   if (!obj)
372     return obj.takeError();
373   if (!T::Check(obj.get().get()))
374     return llvm::createStringError(llvm::inconvertibleErrorCode(),
375                                    "type error");
376   return T(PyRefType::Borrowed, std::move(obj.get().get()));
377 }
378 
379 template <> llvm::Expected<bool> As<bool>(llvm::Expected<PythonObject> &&obj);
380 
381 template <>
382 llvm::Expected<long long> As<long long>(llvm::Expected<PythonObject> &&obj);
383 
384 template <>
385 llvm::Expected<std::string> As<std::string>(llvm::Expected<PythonObject> &&obj);
386 
387 } // namespace python
388 
389 template <class T> class TypedPythonObject : public PythonObject {
390 public:
391   // override to perform implicit type conversions on Reset
392   // This can be eliminated once we drop python 2 support.
393   static void Convert(PyRefType &type, PyObject *&py_obj) {}
394 
395   using PythonObject::Reset;
396 
397   void Reset(PyRefType type, PyObject *py_obj) {
398     Reset();
399     if (!py_obj)
400       return;
401     T::Convert(type, py_obj);
402     if (T::Check(py_obj))
403       PythonObject::Reset(type, py_obj);
404     else if (type == PyRefType::Owned)
405       Py_DECREF(py_obj);
406   }
407 
408   TypedPythonObject(PyRefType type, PyObject *py_obj) { Reset(type, py_obj); }
409 
410   TypedPythonObject() {}
411 };
412 
413 class PythonBytes : public TypedPythonObject<PythonBytes> {
414 public:
415   using TypedPythonObject::TypedPythonObject;
416   explicit PythonBytes(llvm::ArrayRef<uint8_t> bytes);
417   PythonBytes(const uint8_t *bytes, size_t length);
418 
419   static bool Check(PyObject *py_obj);
420 
421   llvm::ArrayRef<uint8_t> GetBytes() const;
422 
423   size_t GetSize() const;
424 
425   void SetBytes(llvm::ArrayRef<uint8_t> stringbytes);
426 
427   StructuredData::StringSP CreateStructuredString() const;
428 };
429 
430 class PythonByteArray : public TypedPythonObject<PythonByteArray> {
431 public:
432   using TypedPythonObject::TypedPythonObject;
433   explicit PythonByteArray(llvm::ArrayRef<uint8_t> bytes);
434   PythonByteArray(const uint8_t *bytes, size_t length);
435   PythonByteArray(const PythonBytes &object);
436 
437   static bool Check(PyObject *py_obj);
438 
439   llvm::ArrayRef<uint8_t> GetBytes() const;
440 
441   size_t GetSize() const;
442 
443   void SetBytes(llvm::ArrayRef<uint8_t> stringbytes);
444 
445   StructuredData::StringSP CreateStructuredString() const;
446 };
447 
448 class PythonString : public TypedPythonObject<PythonString> {
449 public:
450   using TypedPythonObject::TypedPythonObject;
451   static llvm::Expected<PythonString> FromUTF8(llvm::StringRef string);
452 
453   PythonString() : TypedPythonObject() {} // MSVC requires this for some reason
454 
455   explicit PythonString(llvm::StringRef string); // safe, null on error
456 
457   static bool Check(PyObject *py_obj);
458   static void Convert(PyRefType &type, PyObject *&py_obj);
459 
460   llvm::StringRef GetString() const; // safe, empty string on error
461 
462   llvm::Expected<llvm::StringRef> AsUTF8() const;
463 
464   size_t GetSize() const;
465 
466   void SetString(llvm::StringRef string); // safe, null on error
467 
468   StructuredData::StringSP CreateStructuredString() const;
469 };
470 
471 class PythonInteger : public TypedPythonObject<PythonInteger> {
472 public:
473   using TypedPythonObject::TypedPythonObject;
474 
475   PythonInteger() : TypedPythonObject() {} // MSVC requires this for some reason
476 
477   explicit PythonInteger(int64_t value);
478 
479   static bool Check(PyObject *py_obj);
480   static void Convert(PyRefType &type, PyObject *&py_obj);
481 
482   int64_t GetInteger() const;
483 
484   void SetInteger(int64_t value);
485 
486   StructuredData::IntegerSP CreateStructuredInteger() const;
487 };
488 
489 class PythonBoolean : public TypedPythonObject<PythonBoolean> {
490 public:
491   using TypedPythonObject::TypedPythonObject;
492 
493   explicit PythonBoolean(bool value);
494 
495   static bool Check(PyObject *py_obj);
496 
497   bool GetValue() const;
498 
499   void SetValue(bool value);
500 
501   StructuredData::BooleanSP CreateStructuredBoolean() const;
502 };
503 
504 class PythonList : public TypedPythonObject<PythonList> {
505 public:
506   using TypedPythonObject::TypedPythonObject;
507 
508   PythonList() : TypedPythonObject() {} // MSVC requires this for some reason
509 
510   explicit PythonList(PyInitialValue value);
511   explicit PythonList(int list_size);
512 
513   static bool Check(PyObject *py_obj);
514 
515   uint32_t GetSize() const;
516 
517   PythonObject GetItemAtIndex(uint32_t index) const;
518 
519   void SetItemAtIndex(uint32_t index, const PythonObject &object);
520 
521   void AppendItem(const PythonObject &object);
522 
523   StructuredData::ArraySP CreateStructuredArray() const;
524 };
525 
526 class PythonTuple : public TypedPythonObject<PythonTuple> {
527 public:
528   using TypedPythonObject::TypedPythonObject;
529 
530   explicit PythonTuple(PyInitialValue value);
531   explicit PythonTuple(int tuple_size);
532   PythonTuple(std::initializer_list<PythonObject> objects);
533   PythonTuple(std::initializer_list<PyObject *> objects);
534 
535   static bool Check(PyObject *py_obj);
536 
537   uint32_t GetSize() const;
538 
539   PythonObject GetItemAtIndex(uint32_t index) const;
540 
541   void SetItemAtIndex(uint32_t index, const PythonObject &object);
542 
543   StructuredData::ArraySP CreateStructuredArray() const;
544 };
545 
546 class PythonDictionary : public TypedPythonObject<PythonDictionary> {
547 public:
548   using TypedPythonObject::TypedPythonObject;
549 
550   PythonDictionary() : TypedPythonObject() {} // MSVC requires this for some reason
551 
552   explicit PythonDictionary(PyInitialValue value);
553 
554   static bool Check(PyObject *py_obj);
555 
556   uint32_t GetSize() const;
557 
558   PythonList GetKeys() const;
559 
560   PythonObject GetItemForKey(const PythonObject &key) const; // DEPRECATED
561   void SetItemForKey(const PythonObject &key,
562                      const PythonObject &value); // DEPRECATED
563 
564   llvm::Expected<PythonObject> GetItem(const PythonObject &key) const;
565   llvm::Expected<PythonObject> GetItem(const char *key) const;
566   llvm::Error SetItem(const PythonObject &key, const PythonObject &value) const;
567   llvm::Error SetItem(const char *key, const PythonObject &value) const;
568 
569   StructuredData::DictionarySP CreateStructuredDictionary() const;
570 };
571 
572 class PythonModule : public TypedPythonObject<PythonModule> {
573 public:
574   using TypedPythonObject::TypedPythonObject;
575 
576   static bool Check(PyObject *py_obj);
577 
578   static PythonModule BuiltinsModule();
579 
580   static PythonModule MainModule();
581 
582   static PythonModule AddModule(llvm::StringRef module);
583 
584   // safe, returns invalid on error;
585   static PythonModule ImportModule(llvm::StringRef name) {
586     std::string s = name;
587     auto mod = Import(s.c_str());
588     if (!mod) {
589       llvm::consumeError(mod.takeError());
590       return PythonModule();
591     }
592     return std::move(mod.get());
593   }
594 
595   static llvm::Expected<PythonModule> Import(const char *name);
596 
597   llvm::Expected<PythonObject> Get(const char *name);
598 
599   PythonDictionary GetDictionary() const;
600 };
601 
602 class PythonCallable : public TypedPythonObject<PythonCallable> {
603 public:
604   using TypedPythonObject::TypedPythonObject;
605 
606   struct ArgInfo {
607     /* the largest number of positional arguments this callable
608      * can accept, or UNBOUNDED, ie UINT_MAX if it's a varargs
609      * function and can accept an arbitrary number */
610     unsigned max_positional_args;
611     static constexpr unsigned UNBOUNDED = UINT_MAX; // FIXME c++17 inline
612     /* the number of positional arguments, including optional ones,
613      * and excluding varargs.  If this is a bound method, then the
614      * count will still include a +1 for self.
615      *
616      * FIXME. That's crazy.  This should be replaced with
617      * an accurate min and max for positional args.
618      */
619     int count;
620     /* does the callable have positional varargs? */
621     bool has_varargs : 1; // FIXME delete this
622   };
623 
624   static bool Check(PyObject *py_obj);
625 
626   llvm::Expected<ArgInfo> GetArgInfo() const;
627 
628   llvm::Expected<ArgInfo> GetInitArgInfo() const;
629 
630   ArgInfo GetNumArguments() const; // DEPRECATED
631 
632   // If the callable is a Py_Class, then find the number of arguments
633   // of the __init__ method.
634   ArgInfo GetNumInitArguments() const; // DEPRECATED
635 
636   PythonObject operator()();
637 
638   PythonObject operator()(std::initializer_list<PyObject *> args);
639 
640   PythonObject operator()(std::initializer_list<PythonObject> args);
641 
642   template <typename Arg, typename... Args>
643   PythonObject operator()(const Arg &arg, Args... args) {
644     return operator()({arg, args...});
645   }
646 };
647 
648 class PythonFile : public TypedPythonObject<PythonFile> {
649 public:
650   using TypedPythonObject::TypedPythonObject;
651 
652   PythonFile() : TypedPythonObject() {} // MSVC requires this for some reason
653 
654   static bool Check(PyObject *py_obj);
655 
656   static llvm::Expected<PythonFile> FromFile(File &file,
657                                              const char *mode = nullptr);
658 
659   llvm::Expected<lldb::FileSP> ConvertToFile(bool borrowed = false);
660   llvm::Expected<lldb::FileSP>
661   ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed = false);
662 };
663 
664 class PythonException : public llvm::ErrorInfo<PythonException> {
665 private:
666   PyObject *m_exception_type, *m_exception, *m_traceback;
667   PyObject *m_repr_bytes;
668 
669 public:
670   static char ID;
671   const char *toCString() const;
672   PythonException(const char *caller = nullptr);
673   void Restore();
674   ~PythonException();
675   void log(llvm::raw_ostream &OS) const override;
676   std::error_code convertToErrorCode() const override;
677 };
678 
679 // This extracts the underlying T out of an Expected<T> and returns it.
680 // If the Expected is an Error instead of a T, that error will be converted
681 // into a python exception, and this will return a default-constructed T.
682 //
683 // This is appropriate for use right at the boundary of python calling into
684 // C++, such as in a SWIG typemap.   In such a context you should simply
685 // check if the returned T is valid, and if it is, return a NULL back
686 // to python.   This will result in the Error being raised as an exception
687 // from python code's point of view.
688 //
689 // For example:
690 // ```
691 // Expected<Foo *> efoop = some_cpp_function();
692 // Foo *foop = unwrapOrSetPythonException(efoop);
693 // if (!foop)
694 //    return NULL;
695 // do_something(*foop);
696 //
697 // If the Error returned was itself created because a python exception was
698 // raised when C++ code called into python, then the original exception
699 // will be restored.   Otherwise a simple string exception will be raised.
700 template <typename T> T unwrapOrSetPythonException(llvm::Expected<T> expected) {
701   if (expected)
702     return expected.get();
703   llvm::handleAllErrors(
704       expected.takeError(), [](PythonException &E) { E.Restore(); },
705       [](const llvm::ErrorInfoBase &E) {
706         PyErr_SetString(PyExc_Exception, E.message().c_str());
707       });
708   return T();
709 }
710 
711 } // namespace lldb_private
712 
713 #endif
714 
715 #endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
716