1 //===-- PythonDataObjects.h--------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // 10 // !! FIXME FIXME FIXME !! 11 // 12 // Python APIs nearly all can return an exception. They do this 13 // by returning NULL, or -1, or some such value and setting 14 // the exception state with PyErr_Set*(). Exceptions must be 15 // handled before further python API functions are called. Failure 16 // to do so will result in asserts on debug builds of python. 17 // It will also sometimes, but not usually result in crashes of 18 // release builds. 19 // 20 // Nearly all the code in this header does not handle python exceptions 21 // correctly. It should all be converted to return Expected<> or 22 // Error types to capture the exception. 23 // 24 // Everything in this file except functions that return Error or 25 // Expected<> is considered deprecated and should not be 26 // used in new code. If you need to use it, fix it first. 27 // 28 // 29 // TODOs for this file 30 // 31 // * Make all methods safe for exceptions. 32 // 33 // * Eliminate method signatures that must translate exceptions into 34 // empty objects or NULLs. Almost everything here should return 35 // Expected<>. It should be acceptable for certain operations that 36 // can never fail to assert instead, such as the creation of 37 // PythonString from a string literal. 38 // 39 // * Elimintate Reset(), and make all non-default constructors private. 40 // Python objects should be created with Retain<> or Take<>, and they 41 // should be assigned with operator= 42 // 43 // * Eliminate default constructors, make python objects always 44 // nonnull, and use optionals where necessary. 45 // 46 47 48 #ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H 49 #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H 50 51 #ifndef LLDB_DISABLE_PYTHON 52 53 // LLDB Python header must be included first 54 #include "lldb-python.h" 55 56 #include "lldb/Host/File.h" 57 #include "lldb/Utility/StructuredData.h" 58 59 #include "llvm/ADT/ArrayRef.h" 60 61 namespace lldb_private { 62 63 class PythonObject; 64 class PythonBytes; 65 class PythonString; 66 class PythonList; 67 class PythonDictionary; 68 class PythonInteger; 69 class PythonException; 70 71 class StructuredPythonObject : public StructuredData::Generic { 72 public: 73 StructuredPythonObject() : StructuredData::Generic() {} 74 75 StructuredPythonObject(void *obj) : StructuredData::Generic(obj) { 76 Py_XINCREF(GetValue()); 77 } 78 79 ~StructuredPythonObject() override { 80 if (Py_IsInitialized()) 81 Py_XDECREF(GetValue()); 82 SetValue(nullptr); 83 } 84 85 bool IsValid() const override { return GetValue() && GetValue() != Py_None; } 86 87 void Serialize(llvm::json::OStream &s) const override; 88 89 private: 90 DISALLOW_COPY_AND_ASSIGN(StructuredPythonObject); 91 }; 92 93 enum class PyObjectType { 94 Unknown, 95 None, 96 Boolean, 97 Integer, 98 Dictionary, 99 List, 100 String, 101 Bytes, 102 ByteArray, 103 Module, 104 Callable, 105 Tuple, 106 File 107 }; 108 109 enum class PyRefType { 110 Borrowed, // We are not given ownership of the incoming PyObject. 111 // We cannot safely hold it without calling Py_INCREF. 112 Owned // We have ownership of the incoming PyObject. We should 113 // not call Py_INCREF. 114 }; 115 116 namespace python { 117 118 // Take a reference that you already own, and turn it into 119 // a PythonObject. 120 // 121 // Most python API methods will return a +1 reference 122 // if they succeed or NULL if and only if 123 // they set an exception. Use this to collect such return 124 // values, after checking for NULL. 125 // 126 // If T is not just PythonObject, then obj must be already be 127 // checked to be of the correct type. 128 template <typename T> T Take(PyObject *obj) { 129 assert(obj); 130 assert(!PyErr_Occurred()); 131 T thing(PyRefType::Owned, obj); 132 assert(thing.IsValid()); 133 return std::move(thing); 134 } 135 136 // Retain a reference you have borrowed, and turn it into 137 // a PythonObject. 138 // 139 // A minority of python APIs return a borrowed reference 140 // instead of a +1. They will also return NULL if and only 141 // if they set an exception. Use this to collect such return 142 // values, after checking for NULL. 143 // 144 // If T is not just PythonObject, then obj must be already be 145 // checked to be of the correct type. 146 template <typename T> T Retain(PyObject *obj) { 147 assert(obj); 148 assert(!PyErr_Occurred()); 149 T thing(PyRefType::Borrowed, obj); 150 assert(thing.IsValid()); 151 return std::move(thing); 152 } 153 154 } // namespace python 155 156 enum class PyInitialValue { Invalid, Empty }; 157 158 template <typename T, typename Enable = void> struct PythonFormat; 159 160 template <> struct PythonFormat<unsigned long long> { 161 static constexpr char format = 'K'; 162 static auto get(unsigned long long value) { return value; } 163 }; 164 165 template <> struct PythonFormat<long long> { 166 static constexpr char format = 'L'; 167 static auto get(long long value) { return value; } 168 }; 169 170 template <typename T> 171 struct PythonFormat< 172 T, typename std::enable_if<std::is_base_of<PythonObject, T>::value>::type> { 173 static constexpr char format = 'O'; 174 static auto get(const T &value) { return value.get(); } 175 }; 176 177 class PythonObject { 178 public: 179 PythonObject() : m_py_obj(nullptr) {} 180 181 PythonObject(PyRefType type, PyObject *py_obj) : m_py_obj(nullptr) { 182 Reset(type, py_obj); 183 } 184 185 PythonObject(const PythonObject &rhs) : m_py_obj(nullptr) { Reset(rhs); } 186 187 PythonObject(PythonObject &&rhs) { 188 m_py_obj = rhs.m_py_obj; 189 rhs.m_py_obj = nullptr; 190 } 191 192 ~PythonObject() { Reset(); } 193 194 void Reset() { 195 if (m_py_obj && Py_IsInitialized()) 196 Py_DECREF(m_py_obj); 197 m_py_obj = nullptr; 198 } 199 200 void Reset(const PythonObject &rhs) { 201 if (!rhs.IsValid()) 202 Reset(); 203 else 204 Reset(PyRefType::Borrowed, rhs.m_py_obj); 205 } 206 207 // PythonObject is implicitly convertible to PyObject *, which will call the 208 // wrong overload. We want to explicitly disallow this, since a PyObject 209 // *always* owns its reference. Therefore the overload which takes a 210 // PyRefType doesn't make sense, and the copy constructor should be used. 211 void Reset(PyRefType type, const PythonObject &ref) = delete; 212 213 void Reset(PyRefType type, PyObject *py_obj) { 214 if (py_obj == m_py_obj) 215 return; 216 217 if (Py_IsInitialized()) 218 Py_XDECREF(m_py_obj); 219 220 m_py_obj = py_obj; 221 222 // If this is a borrowed reference, we need to convert it to 223 // an owned reference by incrementing it. If it is an owned 224 // reference (for example the caller allocated it with PyDict_New() 225 // then we must *not* increment it. 226 if (m_py_obj && Py_IsInitialized() && type == PyRefType::Borrowed) 227 Py_XINCREF(m_py_obj); 228 } 229 230 void Dump() const { 231 if (m_py_obj) 232 _PyObject_Dump(m_py_obj); 233 else 234 puts("NULL"); 235 } 236 237 void Dump(Stream &strm) const; 238 239 PyObject *get() const { return m_py_obj; } 240 241 PyObject *release() { 242 PyObject *result = m_py_obj; 243 m_py_obj = nullptr; 244 return result; 245 } 246 247 PythonObject &operator=(const PythonObject &other) { 248 Reset(PyRefType::Borrowed, other.get()); 249 return *this; 250 } 251 252 void Reset(PythonObject &&other) { 253 Reset(); 254 m_py_obj = other.m_py_obj; 255 other.m_py_obj = nullptr; 256 } 257 258 PythonObject &operator=(PythonObject &&other) { 259 Reset(std::move(other)); 260 return *this; 261 } 262 263 PyObjectType GetObjectType() const; 264 265 PythonString Repr() const; 266 267 PythonString Str() const; 268 269 static PythonObject ResolveNameWithDictionary(llvm::StringRef name, 270 const PythonDictionary &dict); 271 272 template <typename T> 273 static T ResolveNameWithDictionary(llvm::StringRef name, 274 const PythonDictionary &dict) { 275 return ResolveNameWithDictionary(name, dict).AsType<T>(); 276 } 277 278 PythonObject ResolveName(llvm::StringRef name) const; 279 280 template <typename T> T ResolveName(llvm::StringRef name) const { 281 return ResolveName(name).AsType<T>(); 282 } 283 284 bool HasAttribute(llvm::StringRef attribute) const; 285 286 PythonObject GetAttributeValue(llvm::StringRef attribute) const; 287 288 bool IsNone() const { return m_py_obj == Py_None; } 289 290 bool IsValid() const { return m_py_obj != nullptr; } 291 292 bool IsAllocated() const { return IsValid() && !IsNone(); } 293 294 explicit operator bool() const { return IsValid() && !IsNone(); } 295 296 template <typename T> T AsType() const { 297 if (!T::Check(m_py_obj)) 298 return T(); 299 return T(PyRefType::Borrowed, m_py_obj); 300 } 301 302 StructuredData::ObjectSP CreateStructuredObject() const; 303 304 protected: 305 static llvm::Error nullDeref() { 306 return llvm::createStringError(llvm::inconvertibleErrorCode(), 307 "A NULL PyObject* was dereferenced"); 308 } 309 static llvm::Error exception(const char *s = nullptr) { 310 return llvm::make_error<PythonException>(s); 311 } 312 313 public: 314 template <typename... T> 315 llvm::Expected<PythonObject> CallMethod(const char *name, 316 const T &... t) const { 317 const char format[] = {'(', PythonFormat<T>::format..., ')', 0}; 318 #if PY_MAJOR_VERSION < 3 319 PyObject *obj = PyObject_CallMethod(m_py_obj, const_cast<char *>(name), 320 const_cast<char *>(format), 321 PythonFormat<T>::get(t)...); 322 #else 323 PyObject *obj = 324 PyObject_CallMethod(m_py_obj, name, format, PythonFormat<T>::get(t)...); 325 #endif 326 if (!obj) 327 return exception(); 328 return python::Take<PythonObject>(obj); 329 } 330 331 llvm::Expected<PythonObject> GetAttribute(const char *name) const { 332 if (!m_py_obj) 333 return nullDeref(); 334 PyObject *obj = PyObject_GetAttrString(m_py_obj, name); 335 if (!obj) 336 return exception(); 337 return python::Take<PythonObject>(obj); 338 } 339 340 llvm::Expected<bool> IsTrue() { 341 if (!m_py_obj) 342 return nullDeref(); 343 int r = PyObject_IsTrue(m_py_obj); 344 if (r < 0) 345 return exception(); 346 return !!r; 347 } 348 349 llvm::Expected<long long> AsLongLong() { 350 if (!m_py_obj) 351 return nullDeref(); 352 assert(!PyErr_Occurred()); 353 long long r = PyLong_AsLongLong(m_py_obj); 354 if (PyErr_Occurred()) 355 return exception(); 356 return r; 357 } 358 359 llvm::Expected<bool> IsInstance(const PythonObject &cls) { 360 if (!m_py_obj || !cls.IsValid()) 361 return nullDeref(); 362 int r = PyObject_IsInstance(m_py_obj, cls.get()); 363 if (r < 0) 364 return exception(); 365 return !!r; 366 } 367 368 protected: 369 PyObject *m_py_obj; 370 }; 371 372 namespace python { 373 374 // This is why C++ needs monads. 375 template <typename T> llvm::Expected<T> As(llvm::Expected<PythonObject> &&obj) { 376 if (!obj) 377 return obj.takeError(); 378 if (!T::Check(obj.get().get())) 379 return llvm::createStringError(llvm::inconvertibleErrorCode(), 380 "type error"); 381 return T(PyRefType::Borrowed, std::move(obj.get().get())); 382 } 383 384 template <> llvm::Expected<bool> As<bool>(llvm::Expected<PythonObject> &&obj); 385 386 template <> 387 llvm::Expected<long long> As<long long>(llvm::Expected<PythonObject> &&obj); 388 389 } // namespace python 390 391 template <class T> class TypedPythonObject : public PythonObject { 392 public: 393 // override to perform implicit type conversions on Reset 394 // This can be eliminated once we drop python 2 support. 395 static void Convert(PyRefType &type, PyObject *&py_obj) {} 396 397 using PythonObject::Reset; 398 399 void Reset(PyRefType type, PyObject *py_obj) { 400 Reset(); 401 if (!py_obj) 402 return; 403 T::Convert(type, py_obj); 404 if (T::Check(py_obj)) 405 PythonObject::Reset(type, py_obj); 406 else if (type == PyRefType::Owned) 407 Py_DECREF(py_obj); 408 } 409 410 TypedPythonObject(PyRefType type, PyObject *py_obj) { Reset(type, py_obj); } 411 412 TypedPythonObject() {} 413 }; 414 415 class PythonBytes : public TypedPythonObject<PythonBytes> { 416 public: 417 using TypedPythonObject::TypedPythonObject; 418 explicit PythonBytes(llvm::ArrayRef<uint8_t> bytes); 419 PythonBytes(const uint8_t *bytes, size_t length); 420 421 static bool Check(PyObject *py_obj); 422 423 llvm::ArrayRef<uint8_t> GetBytes() const; 424 425 size_t GetSize() const; 426 427 void SetBytes(llvm::ArrayRef<uint8_t> stringbytes); 428 429 StructuredData::StringSP CreateStructuredString() const; 430 }; 431 432 class PythonByteArray : public TypedPythonObject<PythonByteArray> { 433 public: 434 using TypedPythonObject::TypedPythonObject; 435 explicit PythonByteArray(llvm::ArrayRef<uint8_t> bytes); 436 PythonByteArray(const uint8_t *bytes, size_t length); 437 PythonByteArray(const PythonBytes &object); 438 439 static bool Check(PyObject *py_obj); 440 441 llvm::ArrayRef<uint8_t> GetBytes() const; 442 443 size_t GetSize() const; 444 445 void SetBytes(llvm::ArrayRef<uint8_t> stringbytes); 446 447 StructuredData::StringSP CreateStructuredString() const; 448 }; 449 450 class PythonString : public TypedPythonObject<PythonString> { 451 public: 452 using TypedPythonObject::TypedPythonObject; 453 static llvm::Expected<PythonString> FromUTF8(llvm::StringRef string); 454 455 PythonString() : TypedPythonObject() {} // MSVC requires this for some reason 456 457 explicit PythonString(llvm::StringRef string); // safe, null on error 458 459 static bool Check(PyObject *py_obj); 460 static void Convert(PyRefType &type, PyObject *&py_obj); 461 462 llvm::StringRef GetString() const; // safe, empty string on error 463 464 llvm::Expected<llvm::StringRef> AsUTF8() const; 465 466 size_t GetSize() const; 467 468 void SetString(llvm::StringRef string); // safe, null on error 469 470 StructuredData::StringSP CreateStructuredString() const; 471 }; 472 473 class PythonInteger : public TypedPythonObject<PythonInteger> { 474 public: 475 using TypedPythonObject::TypedPythonObject; 476 477 PythonInteger() : TypedPythonObject() {} // MSVC requires this for some reason 478 479 explicit PythonInteger(int64_t value); 480 481 static bool Check(PyObject *py_obj); 482 static void Convert(PyRefType &type, PyObject *&py_obj); 483 484 int64_t GetInteger() const; 485 486 void SetInteger(int64_t value); 487 488 StructuredData::IntegerSP CreateStructuredInteger() const; 489 }; 490 491 class PythonBoolean : public TypedPythonObject<PythonBoolean> { 492 public: 493 using TypedPythonObject::TypedPythonObject; 494 495 explicit PythonBoolean(bool value); 496 497 static bool Check(PyObject *py_obj); 498 499 bool GetValue() const; 500 501 void SetValue(bool value); 502 503 StructuredData::BooleanSP CreateStructuredBoolean() const; 504 }; 505 506 class PythonList : public TypedPythonObject<PythonList> { 507 public: 508 using TypedPythonObject::TypedPythonObject; 509 510 PythonList() : TypedPythonObject() {} // MSVC requires this for some reason 511 512 explicit PythonList(PyInitialValue value); 513 explicit PythonList(int list_size); 514 515 static bool Check(PyObject *py_obj); 516 517 uint32_t GetSize() const; 518 519 PythonObject GetItemAtIndex(uint32_t index) const; 520 521 void SetItemAtIndex(uint32_t index, const PythonObject &object); 522 523 void AppendItem(const PythonObject &object); 524 525 StructuredData::ArraySP CreateStructuredArray() const; 526 }; 527 528 class PythonTuple : public TypedPythonObject<PythonTuple> { 529 public: 530 using TypedPythonObject::TypedPythonObject; 531 532 explicit PythonTuple(PyInitialValue value); 533 explicit PythonTuple(int tuple_size); 534 PythonTuple(std::initializer_list<PythonObject> objects); 535 PythonTuple(std::initializer_list<PyObject *> objects); 536 537 static bool Check(PyObject *py_obj); 538 539 uint32_t GetSize() const; 540 541 PythonObject GetItemAtIndex(uint32_t index) const; 542 543 void SetItemAtIndex(uint32_t index, const PythonObject &object); 544 545 StructuredData::ArraySP CreateStructuredArray() const; 546 }; 547 548 class PythonDictionary : public TypedPythonObject<PythonDictionary> { 549 public: 550 using TypedPythonObject::TypedPythonObject; 551 552 PythonDictionary() : TypedPythonObject() {} // MSVC requires this for some reason 553 554 explicit PythonDictionary(PyInitialValue value); 555 556 static bool Check(PyObject *py_obj); 557 558 uint32_t GetSize() const; 559 560 PythonList GetKeys() const; 561 562 PythonObject GetItemForKey(const PythonObject &key) const; 563 void SetItemForKey(const PythonObject &key, const PythonObject &value); 564 565 StructuredData::DictionarySP CreateStructuredDictionary() const; 566 }; 567 568 class PythonModule : public TypedPythonObject<PythonModule> { 569 public: 570 using TypedPythonObject::TypedPythonObject; 571 572 static bool Check(PyObject *py_obj); 573 574 static PythonModule BuiltinsModule(); 575 576 static PythonModule MainModule(); 577 578 static PythonModule AddModule(llvm::StringRef module); 579 580 // safe, returns invalid on error; 581 static PythonModule ImportModule(llvm::StringRef name) { 582 std::string s = name; 583 auto mod = Import(s.c_str()); 584 if (!mod) { 585 llvm::consumeError(mod.takeError()); 586 return PythonModule(); 587 } 588 return std::move(mod.get()); 589 } 590 591 static llvm::Expected<PythonModule> Import(const char *name); 592 593 llvm::Expected<PythonObject> Get(const char *name); 594 595 PythonDictionary GetDictionary() const; 596 }; 597 598 class PythonCallable : public TypedPythonObject<PythonCallable> { 599 public: 600 using TypedPythonObject::TypedPythonObject; 601 602 struct ArgInfo { 603 size_t count; 604 bool is_bound_method : 1; 605 bool has_varargs : 1; 606 bool has_kwargs : 1; 607 }; 608 609 static bool Check(PyObject *py_obj); 610 611 ArgInfo GetNumArguments() const; 612 613 // If the callable is a Py_Class, then find the number of arguments 614 // of the __init__ method. 615 ArgInfo GetNumInitArguments() const; 616 617 PythonObject operator()(); 618 619 PythonObject operator()(std::initializer_list<PyObject *> args); 620 621 PythonObject operator()(std::initializer_list<PythonObject> args); 622 623 template <typename Arg, typename... Args> 624 PythonObject operator()(const Arg &arg, Args... args) { 625 return operator()({arg, args...}); 626 } 627 }; 628 629 class PythonFile : public TypedPythonObject<PythonFile> { 630 public: 631 using TypedPythonObject::TypedPythonObject; 632 633 PythonFile() : TypedPythonObject() {} // MSVC requires this for some reason 634 635 static bool Check(PyObject *py_obj); 636 637 static llvm::Expected<PythonFile> FromFile(File &file, 638 const char *mode = nullptr); 639 640 llvm::Expected<lldb::FileSP> ConvertToFile(bool borrowed = false); 641 llvm::Expected<lldb::FileSP> 642 ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed = false); 643 }; 644 645 class PythonException : public llvm::ErrorInfo<PythonException> { 646 private: 647 PyObject *m_exception_type, *m_exception, *m_traceback; 648 PyObject *m_repr_bytes; 649 650 public: 651 static char ID; 652 const char *toCString() const; 653 PythonException(const char *caller = nullptr); 654 void Restore(); 655 ~PythonException(); 656 void log(llvm::raw_ostream &OS) const override; 657 std::error_code convertToErrorCode() const override; 658 }; 659 660 // This extracts the underlying T out of an Expected<T> and returns it. 661 // If the Expected is an Error instead of a T, that error will be converted 662 // into a python exception, and this will return a default-constructed T. 663 // 664 // This is appropriate for use right at the boundary of python calling into 665 // C++, such as in a SWIG typemap. In such a context you should simply 666 // check if the returned T is valid, and if it is, return a NULL back 667 // to python. This will result in the Error being raised as an exception 668 // from python code's point of view. 669 // 670 // For example: 671 // ``` 672 // Expected<Foo *> efoop = some_cpp_function(); 673 // Foo *foop = unwrapOrSetPythonException(efoop); 674 // if (!foop) 675 // return NULL; 676 // do_something(*foop); 677 // 678 // If the Error returned was itself created because a python exception was 679 // raised when C++ code called into python, then the original exception 680 // will be restored. Otherwise a simple string exception will be raised. 681 template <typename T> T unwrapOrSetPythonException(llvm::Expected<T> expected) { 682 if (expected) 683 return expected.get(); 684 llvm::handleAllErrors( 685 expected.takeError(), [](PythonException &E) { E.Restore(); }, 686 [](const llvm::ErrorInfoBase &E) { 687 PyErr_SetString(PyExc_Exception, E.message().c_str()); 688 }); 689 return T(); 690 } 691 692 } // namespace lldb_private 693 694 #endif 695 696 #endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H 697