xref: /llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h (revision 085328eeeeba63ab3ef2d59f1bb8fa83ca069d33)
1 //===-- PythonDataObjects.h--------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 //
10 // !! FIXME FIXME FIXME !!
11 //
12 // Python APIs nearly all can return an exception.   They do this
13 // by returning NULL, or -1, or some such value and setting
14 // the exception state with PyErr_Set*().   Exceptions must be
15 // handled before further python API functions are called.   Failure
16 // to do so will result in asserts on debug builds of python.
17 // It will also sometimes, but not usually result in crashes of
18 // release builds.
19 //
20 // Nearly all the code in this header does not handle python exceptions
21 // correctly.  It should all be converted to return Expected<> or
22 // Error types to capture the exception.
23 //
24 // Everything in this file except functions that return Error or
25 // Expected<> is considered deprecated and should not be
26 // used in new code.  If you need to use it, fix it first.
27 //
28 
29 #ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
30 #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
31 
32 #ifndef LLDB_DISABLE_PYTHON
33 
34 // LLDB Python header must be included first
35 #include "lldb-python.h"
36 
37 #include "lldb/Host/File.h"
38 #include "lldb/Utility/StructuredData.h"
39 
40 #include "llvm/ADT/ArrayRef.h"
41 
42 namespace lldb_private {
43 
44 class PythonObject;
45 class PythonBytes;
46 class PythonString;
47 class PythonList;
48 class PythonDictionary;
49 class PythonInteger;
50 class PythonException;
51 
52 class StructuredPythonObject : public StructuredData::Generic {
53 public:
54   StructuredPythonObject() : StructuredData::Generic() {}
55 
56   StructuredPythonObject(void *obj) : StructuredData::Generic(obj) {
57     Py_XINCREF(GetValue());
58   }
59 
60   ~StructuredPythonObject() override {
61     if (Py_IsInitialized())
62       Py_XDECREF(GetValue());
63     SetValue(nullptr);
64   }
65 
66   bool IsValid() const override { return GetValue() && GetValue() != Py_None; }
67 
68   void Serialize(llvm::json::OStream &s) const override;
69 
70 private:
71   DISALLOW_COPY_AND_ASSIGN(StructuredPythonObject);
72 };
73 
74 enum class PyObjectType {
75   Unknown,
76   None,
77   Boolean,
78   Integer,
79   Dictionary,
80   List,
81   String,
82   Bytes,
83   ByteArray,
84   Module,
85   Callable,
86   Tuple,
87   File
88 };
89 
90 enum class PyRefType {
91   Borrowed, // We are not given ownership of the incoming PyObject.
92             // We cannot safely hold it without calling Py_INCREF.
93   Owned     // We have ownership of the incoming PyObject.  We should
94             // not call Py_INCREF.
95 };
96 
97 namespace python {
98 
99 // Take a reference that you already own, and turn it into
100 // a PythonObject.
101 //
102 // Most python API methods will return a +1 reference
103 // if they succeed or NULL if and only if
104 // they set an exception.   Use this to collect such return
105 // values, after checking for NULL.
106 //
107 // If T is not just PythonObject, then obj must be already be
108 // checked to be of the correct type.
109 template <typename T> T Take(PyObject *obj) {
110   assert(obj);
111   assert(!PyErr_Occurred());
112   T thing(PyRefType::Owned, obj);
113   assert(thing.IsValid());
114   return std::move(thing);
115 }
116 
117 // Retain a reference you have borrowed, and turn it into
118 // a PythonObject.
119 //
120 // A minority of python APIs return a borrowed reference
121 // instead of a +1.   They will also return NULL if and only
122 // if they set an exception.   Use this to collect such return
123 // values, after checking for NULL.
124 //
125 // If T is not just PythonObject, then obj must be already be
126 // checked to be of the correct type.
127 template <typename T> T Retain(PyObject *obj) {
128   assert(obj);
129   assert(!PyErr_Occurred());
130   T thing(PyRefType::Borrowed, obj);
131   assert(thing.IsValid());
132   return std::move(thing);
133 }
134 
135 } // namespace python
136 
137 enum class PyInitialValue { Invalid, Empty };
138 
139 template <typename T, typename Enable = void> struct PythonFormat;
140 
141 template <> struct PythonFormat<unsigned long long> {
142   static constexpr char format = 'K';
143   static auto get(unsigned long long value) { return value; }
144 };
145 
146 template <> struct PythonFormat<long long> {
147   static constexpr char format = 'L';
148   static auto get(long long value) { return value; }
149 };
150 
151 template <typename T>
152 struct PythonFormat<
153     T, typename std::enable_if<std::is_base_of<PythonObject, T>::value>::type> {
154   static constexpr char format = 'O';
155   static auto get(const T &value) { return value.get(); }
156 };
157 
158 class PythonObject {
159 public:
160   PythonObject() : m_py_obj(nullptr) {}
161 
162   PythonObject(PyRefType type, PyObject *py_obj) : m_py_obj(nullptr) {
163     Reset(type, py_obj);
164   }
165 
166   PythonObject(const PythonObject &rhs) : m_py_obj(nullptr) { Reset(rhs); }
167 
168   PythonObject(PythonObject &&rhs) {
169     m_py_obj = rhs.m_py_obj;
170     rhs.m_py_obj = nullptr;
171   }
172 
173   virtual ~PythonObject() { Reset(); }
174 
175   void Reset() {
176     // Avoid calling the virtual method since it's not necessary
177     // to actually validate the type of the PyObject if we're
178     // just setting to null.
179     if (m_py_obj && Py_IsInitialized())
180       Py_DECREF(m_py_obj);
181     m_py_obj = nullptr;
182   }
183 
184   void Reset(const PythonObject &rhs) {
185     // Avoid calling the virtual method if it's not necessary
186     // to actually validate the type of the PyObject.
187     if (!rhs.IsValid())
188       Reset();
189     else
190       Reset(PyRefType::Borrowed, rhs.m_py_obj);
191   }
192 
193   // PythonObject is implicitly convertible to PyObject *, which will call the
194   // wrong overload.  We want to explicitly disallow this, since a PyObject
195   // *always* owns its reference.  Therefore the overload which takes a
196   // PyRefType doesn't make sense, and the copy constructor should be used.
197   void Reset(PyRefType type, const PythonObject &ref) = delete;
198 
199   // FIXME We shouldn't have virtual anything.  PythonObject should be a
200   // strictly pass-by-value type.
201   virtual void Reset(PyRefType type, PyObject *py_obj) {
202     if (py_obj == m_py_obj)
203       return;
204 
205     if (Py_IsInitialized())
206       Py_XDECREF(m_py_obj);
207 
208     m_py_obj = py_obj;
209 
210     // If this is a borrowed reference, we need to convert it to
211     // an owned reference by incrementing it.  If it is an owned
212     // reference (for example the caller allocated it with PyDict_New()
213     // then we must *not* increment it.
214     if (m_py_obj && Py_IsInitialized() && type == PyRefType::Borrowed)
215       Py_XINCREF(m_py_obj);
216   }
217 
218   void Dump() const {
219     if (m_py_obj)
220       _PyObject_Dump(m_py_obj);
221     else
222       puts("NULL");
223   }
224 
225   void Dump(Stream &strm) const;
226 
227   PyObject *get() const { return m_py_obj; }
228 
229   PyObject *release() {
230     PyObject *result = m_py_obj;
231     m_py_obj = nullptr;
232     return result;
233   }
234 
235   PythonObject &operator=(const PythonObject &other) {
236     Reset(PyRefType::Borrowed, other.get());
237     return *this;
238   }
239 
240   void Reset(PythonObject &&other) {
241     Reset();
242     m_py_obj = other.m_py_obj;
243     other.m_py_obj = nullptr;
244   }
245 
246   PythonObject &operator=(PythonObject &&other) {
247     Reset(std::move(other));
248     return *this;
249   }
250 
251   PyObjectType GetObjectType() const;
252 
253   PythonString Repr() const;
254 
255   PythonString Str() const;
256 
257   static PythonObject ResolveNameWithDictionary(llvm::StringRef name,
258                                                 const PythonDictionary &dict);
259 
260   template <typename T>
261   static T ResolveNameWithDictionary(llvm::StringRef name,
262                                      const PythonDictionary &dict) {
263     return ResolveNameWithDictionary(name, dict).AsType<T>();
264   }
265 
266   PythonObject ResolveName(llvm::StringRef name) const;
267 
268   template <typename T> T ResolveName(llvm::StringRef name) const {
269     return ResolveName(name).AsType<T>();
270   }
271 
272   bool HasAttribute(llvm::StringRef attribute) const;
273 
274   PythonObject GetAttributeValue(llvm::StringRef attribute) const;
275 
276   bool IsNone() const { return m_py_obj == Py_None; }
277 
278   bool IsValid() const { return m_py_obj != nullptr; }
279 
280   bool IsAllocated() const { return IsValid() && !IsNone(); }
281 
282   explicit operator bool() const { return IsValid() && !IsNone(); }
283 
284   template <typename T> T AsType() const {
285     if (!T::Check(m_py_obj))
286       return T();
287     return T(PyRefType::Borrowed, m_py_obj);
288   }
289 
290   StructuredData::ObjectSP CreateStructuredObject() const;
291 
292 protected:
293   static llvm::Error nullDeref() {
294     return llvm::createStringError(llvm::inconvertibleErrorCode(),
295                                    "A NULL PyObject* was dereferenced");
296   }
297   static llvm::Error exception(const char *s = nullptr) {
298     return llvm::make_error<PythonException>(s);
299   }
300 
301 public:
302   template <typename... T>
303   llvm::Expected<PythonObject> CallMethod(const char *name,
304                                           const T &... t) const {
305     const char format[] = {'(', PythonFormat<T>::format..., ')', 0};
306 #if PY_MAJOR_VERSION < 3
307     PyObject *obj = PyObject_CallMethod(m_py_obj, const_cast<char *>(name),
308                                         const_cast<char *>(format),
309                                         PythonFormat<T>::get(t)...);
310 #else
311     PyObject *obj =
312         PyObject_CallMethod(m_py_obj, name, format, PythonFormat<T>::get(t)...);
313 #endif
314     if (!obj)
315       return exception();
316     return python::Take<PythonObject>(obj);
317   }
318 
319   llvm::Expected<PythonObject> GetAttribute(const char *name) const {
320     if (!m_py_obj)
321       return nullDeref();
322     PyObject *obj = PyObject_GetAttrString(m_py_obj, name);
323     if (!obj)
324       return exception();
325     return python::Take<PythonObject>(obj);
326   }
327 
328   llvm::Expected<bool> IsTrue() {
329     if (!m_py_obj)
330       return nullDeref();
331     int r = PyObject_IsTrue(m_py_obj);
332     if (r < 0)
333       return exception();
334     return !!r;
335   }
336 
337   llvm::Expected<long long> AsLongLong() {
338     if (!m_py_obj)
339       return nullDeref();
340     assert(!PyErr_Occurred());
341     long long r = PyLong_AsLongLong(m_py_obj);
342     if (PyErr_Occurred())
343       return exception();
344     return r;
345   }
346 
347   llvm::Expected<bool> IsInstance(const PythonObject &cls) {
348     if (!m_py_obj || !cls.IsValid())
349       return nullDeref();
350     int r = PyObject_IsInstance(m_py_obj, cls.get());
351     if (r < 0)
352       return exception();
353     return !!r;
354   }
355 
356 protected:
357   PyObject *m_py_obj;
358 };
359 
360 namespace python {
361 
362 // This is why C++ needs monads.
363 template <typename T> llvm::Expected<T> As(llvm::Expected<PythonObject> &&obj) {
364   if (!obj)
365     return obj.takeError();
366   if (!T::Check(obj.get().get()))
367     return llvm::createStringError(llvm::inconvertibleErrorCode(),
368                                    "type error");
369   return T(PyRefType::Borrowed, std::move(obj.get().get()));
370 }
371 
372 template <> llvm::Expected<bool> As<bool>(llvm::Expected<PythonObject> &&obj);
373 
374 template <>
375 llvm::Expected<long long> As<long long>(llvm::Expected<PythonObject> &&obj);
376 
377 } // namespace python
378 
379 class PythonBytes : public PythonObject {
380 public:
381   PythonBytes();
382   explicit PythonBytes(llvm::ArrayRef<uint8_t> bytes);
383   PythonBytes(const uint8_t *bytes, size_t length);
384   PythonBytes(PyRefType type, PyObject *o);
385 
386   ~PythonBytes() override;
387 
388   static bool Check(PyObject *py_obj);
389 
390   // Bring in the no-argument base class version
391   using PythonObject::Reset;
392 
393   void Reset(PyRefType type, PyObject *py_obj) override;
394 
395   llvm::ArrayRef<uint8_t> GetBytes() const;
396 
397   size_t GetSize() const;
398 
399   void SetBytes(llvm::ArrayRef<uint8_t> stringbytes);
400 
401   StructuredData::StringSP CreateStructuredString() const;
402 };
403 
404 class PythonByteArray : public PythonObject {
405 public:
406   PythonByteArray();
407   explicit PythonByteArray(llvm::ArrayRef<uint8_t> bytes);
408   PythonByteArray(const uint8_t *bytes, size_t length);
409   PythonByteArray(PyRefType type, PyObject *o);
410   PythonByteArray(const PythonBytes &object);
411 
412   ~PythonByteArray() override;
413 
414   static bool Check(PyObject *py_obj);
415 
416   // Bring in the no-argument base class version
417   using PythonObject::Reset;
418 
419   void Reset(PyRefType type, PyObject *py_obj) override;
420 
421   llvm::ArrayRef<uint8_t> GetBytes() const;
422 
423   size_t GetSize() const;
424 
425   void SetBytes(llvm::ArrayRef<uint8_t> stringbytes);
426 
427   StructuredData::StringSP CreateStructuredString() const;
428 };
429 
430 class PythonString : public PythonObject {
431 public:
432   static llvm::Expected<PythonString> FromUTF8(llvm::StringRef string);
433 
434   PythonString();
435   explicit PythonString(llvm::StringRef string); // safe, null on error
436   PythonString(PyRefType type, PyObject *o);
437 
438   ~PythonString() override;
439 
440   static bool Check(PyObject *py_obj);
441 
442   // Bring in the no-argument base class version
443   using PythonObject::Reset;
444 
445   void Reset(PyRefType type, PyObject *py_obj) override;
446 
447   llvm::StringRef GetString() const; // safe, empty string on error
448 
449   llvm::Expected<llvm::StringRef> AsUTF8() const;
450 
451   size_t GetSize() const;
452 
453   void SetString(llvm::StringRef string); // safe, null on error
454 
455   StructuredData::StringSP CreateStructuredString() const;
456 };
457 
458 class PythonInteger : public PythonObject {
459 public:
460   PythonInteger();
461   explicit PythonInteger(int64_t value);
462   PythonInteger(PyRefType type, PyObject *o);
463 
464   ~PythonInteger() override;
465 
466   static bool Check(PyObject *py_obj);
467 
468   // Bring in the no-argument base class version
469   using PythonObject::Reset;
470 
471   void Reset(PyRefType type, PyObject *py_obj) override;
472 
473   int64_t GetInteger() const;
474 
475   void SetInteger(int64_t value);
476 
477   StructuredData::IntegerSP CreateStructuredInteger() const;
478 };
479 
480 class PythonBoolean : public PythonObject {
481 public:
482   PythonBoolean() = default;
483   explicit PythonBoolean(bool value);
484   PythonBoolean(PyRefType type, PyObject *o);
485 
486   ~PythonBoolean() override = default;
487 
488   static bool Check(PyObject *py_obj);
489 
490   // Bring in the no-argument base class version
491   using PythonObject::Reset;
492 
493   void Reset(PyRefType type, PyObject *py_obj) override;
494 
495   bool GetValue() const;
496 
497   void SetValue(bool value);
498 
499   StructuredData::BooleanSP CreateStructuredBoolean() const;
500 };
501 
502 class PythonList : public PythonObject {
503 public:
504   PythonList() {}
505   explicit PythonList(PyInitialValue value);
506   explicit PythonList(int list_size);
507   PythonList(PyRefType type, PyObject *o);
508 
509   ~PythonList() override;
510 
511   static bool Check(PyObject *py_obj);
512 
513   // Bring in the no-argument base class version
514   using PythonObject::Reset;
515 
516   void Reset(PyRefType type, PyObject *py_obj) override;
517 
518   uint32_t GetSize() const;
519 
520   PythonObject GetItemAtIndex(uint32_t index) const;
521 
522   void SetItemAtIndex(uint32_t index, const PythonObject &object);
523 
524   void AppendItem(const PythonObject &object);
525 
526   StructuredData::ArraySP CreateStructuredArray() const;
527 };
528 
529 class PythonTuple : public PythonObject {
530 public:
531   PythonTuple() {}
532   explicit PythonTuple(PyInitialValue value);
533   explicit PythonTuple(int tuple_size);
534   PythonTuple(PyRefType type, PyObject *o);
535   PythonTuple(std::initializer_list<PythonObject> objects);
536   PythonTuple(std::initializer_list<PyObject *> objects);
537 
538   ~PythonTuple() override;
539 
540   static bool Check(PyObject *py_obj);
541 
542   // Bring in the no-argument base class version
543   using PythonObject::Reset;
544 
545   void Reset(PyRefType type, PyObject *py_obj) override;
546 
547   uint32_t GetSize() const;
548 
549   PythonObject GetItemAtIndex(uint32_t index) const;
550 
551   void SetItemAtIndex(uint32_t index, const PythonObject &object);
552 
553   StructuredData::ArraySP CreateStructuredArray() const;
554 };
555 
556 class PythonDictionary : public PythonObject {
557 public:
558   PythonDictionary() {}
559   explicit PythonDictionary(PyInitialValue value);
560   PythonDictionary(PyRefType type, PyObject *o);
561 
562   ~PythonDictionary() override;
563 
564   static bool Check(PyObject *py_obj);
565 
566   // Bring in the no-argument base class version
567   using PythonObject::Reset;
568 
569   void Reset(PyRefType type, PyObject *py_obj) override;
570 
571   uint32_t GetSize() const;
572 
573   PythonList GetKeys() const;
574 
575   PythonObject GetItemForKey(const PythonObject &key) const;
576   void SetItemForKey(const PythonObject &key, const PythonObject &value);
577 
578   StructuredData::DictionarySP CreateStructuredDictionary() const;
579 };
580 
581 class PythonModule : public PythonObject {
582 public:
583   PythonModule();
584   PythonModule(PyRefType type, PyObject *o);
585 
586   ~PythonModule() override;
587 
588   static bool Check(PyObject *py_obj);
589 
590   static PythonModule BuiltinsModule();
591 
592   static PythonModule MainModule();
593 
594   static PythonModule AddModule(llvm::StringRef module);
595 
596   // safe, returns invalid on error;
597   static PythonModule ImportModule(llvm::StringRef name) {
598     std::string s = name;
599     auto mod = Import(s.c_str());
600     if (!mod) {
601       llvm::consumeError(mod.takeError());
602       return PythonModule();
603     }
604     return std::move(mod.get());
605   }
606 
607   static llvm::Expected<PythonModule> Import(const char *name);
608 
609   llvm::Expected<PythonObject> Get(const char *name);
610 
611   // Bring in the no-argument base class version
612   using PythonObject::Reset;
613 
614   void Reset(PyRefType type, PyObject *py_obj) override;
615 
616   PythonDictionary GetDictionary() const;
617 };
618 
619 class PythonCallable : public PythonObject {
620 public:
621   struct ArgInfo {
622     size_t count;
623     bool is_bound_method : 1;
624     bool has_varargs : 1;
625     bool has_kwargs : 1;
626   };
627 
628   PythonCallable();
629   PythonCallable(PyRefType type, PyObject *o);
630 
631   ~PythonCallable() override;
632 
633   static bool Check(PyObject *py_obj);
634 
635   // Bring in the no-argument base class version
636   using PythonObject::Reset;
637 
638   void Reset(PyRefType type, PyObject *py_obj) override;
639 
640   ArgInfo GetNumArguments() const;
641 
642   // If the callable is a Py_Class, then find the number of arguments
643   // of the __init__ method.
644   ArgInfo GetNumInitArguments() const;
645 
646   PythonObject operator()();
647 
648   PythonObject operator()(std::initializer_list<PyObject *> args);
649 
650   PythonObject operator()(std::initializer_list<PythonObject> args);
651 
652   template <typename Arg, typename... Args>
653   PythonObject operator()(const Arg &arg, Args... args) {
654     return operator()({arg, args...});
655   }
656 };
657 
658 class PythonFile : public PythonObject {
659 public:
660   PythonFile();
661   PythonFile(File &file, const char *mode);
662   PythonFile(PyRefType type, PyObject *o);
663 
664   ~PythonFile() override;
665 
666   static bool Check(PyObject *py_obj);
667 
668   using PythonObject::Reset;
669 
670   void Reset(PyRefType type, PyObject *py_obj) override;
671   void Reset(File &file, const char *mode);
672 
673   lldb::FileUP GetUnderlyingFile() const;
674 
675   llvm::Expected<lldb::FileSP> ConvertToFile(bool borrowed = false);
676   llvm::Expected<lldb::FileSP>
677   ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed = false);
678 };
679 
680 class PythonException : public llvm::ErrorInfo<PythonException> {
681 private:
682   PyObject *m_exception_type, *m_exception, *m_traceback;
683   PyObject *m_repr_bytes;
684 
685 public:
686   static char ID;
687   const char *toCString() const;
688   PythonException(const char *caller = nullptr);
689   void Restore();
690   ~PythonException();
691   void log(llvm::raw_ostream &OS) const override;
692   std::error_code convertToErrorCode() const override;
693 };
694 
695 // This extracts the underlying T out of an Expected<T> and returns it.
696 // If the Expected is an Error instead of a T, that error will be converted
697 // into a python exception, and this will return a default-constructed T.
698 //
699 // This is appropriate for use right at the boundary of python calling into
700 // C++, such as in a SWIG typemap.   In such a context you should simply
701 // check if the returned T is valid, and if it is, return a NULL back
702 // to python.   This will result in the Error being raised as an exception
703 // from python code's point of view.
704 //
705 // For example:
706 // ```
707 // Expected<Foo *> efoop = some_cpp_function();
708 // Foo *foop = unwrapOrSetPythonException(efoop);
709 // if (!foop)
710 //    return NULL;
711 // do_something(*foop);
712 //
713 // If the Error returned was itself created because a python exception was
714 // raised when C++ code called into python, then the original exception
715 // will be restored.   Otherwise a simple string exception will be raised.
716 template <typename T> T unwrapOrSetPythonException(llvm::Expected<T> expected) {
717   if (expected)
718     return expected.get();
719   llvm::handleAllErrors(
720       expected.takeError(), [](PythonException &E) { E.Restore(); },
721       [](const llvm::ErrorInfoBase &E) {
722         PyErr_SetString(PyExc_Exception, E.message().c_str());
723       });
724   return T();
725 }
726 
727 } // namespace lldb_private
728 
729 #endif
730 
731 #endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
732