1 //===-- PythonDataObjects.h--------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // 10 // !! FIXME FIXME FIXME !! 11 // 12 // Python APIs nearly all can return an exception. They do this 13 // by returning NULL, or -1, or some such value and setting 14 // the exception state with PyErr_Set*(). Exceptions must be 15 // handled before further python API functions are called. Failure 16 // to do so will result in asserts on debug builds of python. 17 // It will also sometimes, but not usually result in crashes of 18 // release builds. 19 // 20 // Nearly all the code in this header does not handle python exceptions 21 // correctly. It should all be converted to return Expected<> or 22 // Error types to capture the exception. 23 // 24 // Everything in this file except functions that return Error or 25 // Expected<> is considered deprecated and should not be 26 // used in new code. If you need to use it, fix it first. 27 // 28 29 #ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H 30 #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H 31 32 #ifndef LLDB_DISABLE_PYTHON 33 34 // LLDB Python header must be included first 35 #include "lldb-python.h" 36 37 #include "lldb/Host/File.h" 38 #include "lldb/Utility/StructuredData.h" 39 40 #include "llvm/ADT/ArrayRef.h" 41 42 namespace lldb_private { 43 44 class PythonObject; 45 class PythonBytes; 46 class PythonString; 47 class PythonList; 48 class PythonDictionary; 49 class PythonInteger; 50 class PythonException; 51 52 class StructuredPythonObject : public StructuredData::Generic { 53 public: 54 StructuredPythonObject() : StructuredData::Generic() {} 55 56 StructuredPythonObject(void *obj) : StructuredData::Generic(obj) { 57 Py_XINCREF(GetValue()); 58 } 59 60 ~StructuredPythonObject() override { 61 if (Py_IsInitialized()) 62 Py_XDECREF(GetValue()); 63 SetValue(nullptr); 64 } 65 66 bool IsValid() const override { return GetValue() && GetValue() != Py_None; } 67 68 void Serialize(llvm::json::OStream &s) const override; 69 70 private: 71 DISALLOW_COPY_AND_ASSIGN(StructuredPythonObject); 72 }; 73 74 enum class PyObjectType { 75 Unknown, 76 None, 77 Boolean, 78 Integer, 79 Dictionary, 80 List, 81 String, 82 Bytes, 83 ByteArray, 84 Module, 85 Callable, 86 Tuple, 87 File 88 }; 89 90 enum class PyRefType { 91 Borrowed, // We are not given ownership of the incoming PyObject. 92 // We cannot safely hold it without calling Py_INCREF. 93 Owned // We have ownership of the incoming PyObject. We should 94 // not call Py_INCREF. 95 }; 96 97 namespace python { 98 99 // Take a reference that you already own, and turn it into 100 // a PythonObject. 101 // 102 // Most python API methods will return a +1 reference 103 // if they succeed or NULL if and only if 104 // they set an exception. Use this to collect such return 105 // values, after checking for NULL. 106 // 107 // If T is not just PythonObject, then obj must be already be 108 // checked to be of the correct type. 109 template <typename T> T Take(PyObject *obj) { 110 assert(obj); 111 assert(!PyErr_Occurred()); 112 T thing(PyRefType::Owned, obj); 113 assert(thing.IsValid()); 114 return std::move(thing); 115 } 116 117 // Retain a reference you have borrowed, and turn it into 118 // a PythonObject. 119 // 120 // A minority of python APIs return a borrowed reference 121 // instead of a +1. They will also return NULL if and only 122 // if they set an exception. Use this to collect such return 123 // values, after checking for NULL. 124 // 125 // If T is not just PythonObject, then obj must be already be 126 // checked to be of the correct type. 127 template <typename T> T Retain(PyObject *obj) { 128 assert(obj); 129 assert(!PyErr_Occurred()); 130 T thing(PyRefType::Borrowed, obj); 131 assert(thing.IsValid()); 132 return std::move(thing); 133 } 134 135 } // namespace python 136 137 enum class PyInitialValue { Invalid, Empty }; 138 139 template <typename T, typename Enable = void> struct PythonFormat; 140 141 template <> struct PythonFormat<unsigned long long> { 142 static constexpr char format = 'K'; 143 static auto get(unsigned long long value) { return value; } 144 }; 145 146 template <> struct PythonFormat<long long> { 147 static constexpr char format = 'L'; 148 static auto get(long long value) { return value; } 149 }; 150 151 template <typename T> 152 struct PythonFormat< 153 T, typename std::enable_if<std::is_base_of<PythonObject, T>::value>::type> { 154 static constexpr char format = 'O'; 155 static auto get(const T &value) { return value.get(); } 156 }; 157 158 class PythonObject { 159 public: 160 PythonObject() : m_py_obj(nullptr) {} 161 162 PythonObject(PyRefType type, PyObject *py_obj) : m_py_obj(nullptr) { 163 Reset(type, py_obj); 164 } 165 166 PythonObject(const PythonObject &rhs) : m_py_obj(nullptr) { Reset(rhs); } 167 168 PythonObject(PythonObject &&rhs) { 169 m_py_obj = rhs.m_py_obj; 170 rhs.m_py_obj = nullptr; 171 } 172 173 virtual ~PythonObject() { Reset(); } 174 175 void Reset() { 176 // Avoid calling the virtual method since it's not necessary 177 // to actually validate the type of the PyObject if we're 178 // just setting to null. 179 if (m_py_obj && Py_IsInitialized()) 180 Py_DECREF(m_py_obj); 181 m_py_obj = nullptr; 182 } 183 184 void Reset(const PythonObject &rhs) { 185 // Avoid calling the virtual method if it's not necessary 186 // to actually validate the type of the PyObject. 187 if (!rhs.IsValid()) 188 Reset(); 189 else 190 Reset(PyRefType::Borrowed, rhs.m_py_obj); 191 } 192 193 // PythonObject is implicitly convertible to PyObject *, which will call the 194 // wrong overload. We want to explicitly disallow this, since a PyObject 195 // *always* owns its reference. Therefore the overload which takes a 196 // PyRefType doesn't make sense, and the copy constructor should be used. 197 void Reset(PyRefType type, const PythonObject &ref) = delete; 198 199 // FIXME We shouldn't have virtual anything. PythonObject should be a 200 // strictly pass-by-value type. 201 virtual void Reset(PyRefType type, PyObject *py_obj) { 202 if (py_obj == m_py_obj) 203 return; 204 205 if (Py_IsInitialized()) 206 Py_XDECREF(m_py_obj); 207 208 m_py_obj = py_obj; 209 210 // If this is a borrowed reference, we need to convert it to 211 // an owned reference by incrementing it. If it is an owned 212 // reference (for example the caller allocated it with PyDict_New() 213 // then we must *not* increment it. 214 if (m_py_obj && Py_IsInitialized() && type == PyRefType::Borrowed) 215 Py_XINCREF(m_py_obj); 216 } 217 218 void Dump() const { 219 if (m_py_obj) 220 _PyObject_Dump(m_py_obj); 221 else 222 puts("NULL"); 223 } 224 225 void Dump(Stream &strm) const; 226 227 PyObject *get() const { return m_py_obj; } 228 229 PyObject *release() { 230 PyObject *result = m_py_obj; 231 m_py_obj = nullptr; 232 return result; 233 } 234 235 PythonObject &operator=(const PythonObject &other) { 236 Reset(PyRefType::Borrowed, other.get()); 237 return *this; 238 } 239 240 void Reset(PythonObject &&other) { 241 Reset(); 242 m_py_obj = other.m_py_obj; 243 other.m_py_obj = nullptr; 244 } 245 246 PythonObject &operator=(PythonObject &&other) { 247 Reset(std::move(other)); 248 return *this; 249 } 250 251 PyObjectType GetObjectType() const; 252 253 PythonString Repr() const; 254 255 PythonString Str() const; 256 257 static PythonObject ResolveNameWithDictionary(llvm::StringRef name, 258 const PythonDictionary &dict); 259 260 template <typename T> 261 static T ResolveNameWithDictionary(llvm::StringRef name, 262 const PythonDictionary &dict) { 263 return ResolveNameWithDictionary(name, dict).AsType<T>(); 264 } 265 266 PythonObject ResolveName(llvm::StringRef name) const; 267 268 template <typename T> T ResolveName(llvm::StringRef name) const { 269 return ResolveName(name).AsType<T>(); 270 } 271 272 bool HasAttribute(llvm::StringRef attribute) const; 273 274 PythonObject GetAttributeValue(llvm::StringRef attribute) const; 275 276 bool IsNone() const { return m_py_obj == Py_None; } 277 278 bool IsValid() const { return m_py_obj != nullptr; } 279 280 bool IsAllocated() const { return IsValid() && !IsNone(); } 281 282 explicit operator bool() const { return IsValid() && !IsNone(); } 283 284 template <typename T> T AsType() const { 285 if (!T::Check(m_py_obj)) 286 return T(); 287 return T(PyRefType::Borrowed, m_py_obj); 288 } 289 290 StructuredData::ObjectSP CreateStructuredObject() const; 291 292 protected: 293 static llvm::Error nullDeref() { 294 return llvm::createStringError(llvm::inconvertibleErrorCode(), 295 "A NULL PyObject* was dereferenced"); 296 } 297 static llvm::Error exception(const char *s = nullptr) { 298 return llvm::make_error<PythonException>(s); 299 } 300 301 public: 302 template <typename... T> 303 llvm::Expected<PythonObject> CallMethod(const char *name, 304 const T &... t) const { 305 const char format[] = {'(', PythonFormat<T>::format..., ')', 0}; 306 #if PY_MAJOR_VERSION < 3 307 PyObject *obj = PyObject_CallMethod(m_py_obj, const_cast<char *>(name), 308 const_cast<char *>(format), 309 PythonFormat<T>::get(t)...); 310 #else 311 PyObject *obj = 312 PyObject_CallMethod(m_py_obj, name, format, PythonFormat<T>::get(t)...); 313 #endif 314 if (!obj) 315 return exception(); 316 return python::Take<PythonObject>(obj); 317 } 318 319 llvm::Expected<PythonObject> GetAttribute(const char *name) const { 320 if (!m_py_obj) 321 return nullDeref(); 322 PyObject *obj = PyObject_GetAttrString(m_py_obj, name); 323 if (!obj) 324 return exception(); 325 return python::Take<PythonObject>(obj); 326 } 327 328 llvm::Expected<bool> IsTrue() { 329 if (!m_py_obj) 330 return nullDeref(); 331 int r = PyObject_IsTrue(m_py_obj); 332 if (r < 0) 333 return exception(); 334 return !!r; 335 } 336 337 llvm::Expected<long long> AsLongLong() { 338 if (!m_py_obj) 339 return nullDeref(); 340 assert(!PyErr_Occurred()); 341 long long r = PyLong_AsLongLong(m_py_obj); 342 if (PyErr_Occurred()) 343 return exception(); 344 return r; 345 } 346 347 llvm::Expected<bool> IsInstance(const PythonObject &cls) { 348 if (!m_py_obj || !cls.IsValid()) 349 return nullDeref(); 350 int r = PyObject_IsInstance(m_py_obj, cls.get()); 351 if (r < 0) 352 return exception(); 353 return !!r; 354 } 355 356 protected: 357 PyObject *m_py_obj; 358 }; 359 360 namespace python { 361 362 // This is why C++ needs monads. 363 template <typename T> llvm::Expected<T> As(llvm::Expected<PythonObject> &&obj) { 364 if (!obj) 365 return obj.takeError(); 366 if (!T::Check(obj.get().get())) 367 return llvm::createStringError(llvm::inconvertibleErrorCode(), 368 "type error"); 369 return T(PyRefType::Borrowed, std::move(obj.get().get())); 370 } 371 372 template <> llvm::Expected<bool> As<bool>(llvm::Expected<PythonObject> &&obj); 373 374 template <> 375 llvm::Expected<long long> As<long long>(llvm::Expected<PythonObject> &&obj); 376 377 } // namespace python 378 379 class PythonBytes : public PythonObject { 380 public: 381 PythonBytes(); 382 explicit PythonBytes(llvm::ArrayRef<uint8_t> bytes); 383 PythonBytes(const uint8_t *bytes, size_t length); 384 PythonBytes(PyRefType type, PyObject *o); 385 386 ~PythonBytes() override; 387 388 static bool Check(PyObject *py_obj); 389 390 // Bring in the no-argument base class version 391 using PythonObject::Reset; 392 393 void Reset(PyRefType type, PyObject *py_obj) override; 394 395 llvm::ArrayRef<uint8_t> GetBytes() const; 396 397 size_t GetSize() const; 398 399 void SetBytes(llvm::ArrayRef<uint8_t> stringbytes); 400 401 StructuredData::StringSP CreateStructuredString() const; 402 }; 403 404 class PythonByteArray : public PythonObject { 405 public: 406 PythonByteArray(); 407 explicit PythonByteArray(llvm::ArrayRef<uint8_t> bytes); 408 PythonByteArray(const uint8_t *bytes, size_t length); 409 PythonByteArray(PyRefType type, PyObject *o); 410 PythonByteArray(const PythonBytes &object); 411 412 ~PythonByteArray() override; 413 414 static bool Check(PyObject *py_obj); 415 416 // Bring in the no-argument base class version 417 using PythonObject::Reset; 418 419 void Reset(PyRefType type, PyObject *py_obj) override; 420 421 llvm::ArrayRef<uint8_t> GetBytes() const; 422 423 size_t GetSize() const; 424 425 void SetBytes(llvm::ArrayRef<uint8_t> stringbytes); 426 427 StructuredData::StringSP CreateStructuredString() const; 428 }; 429 430 class PythonString : public PythonObject { 431 public: 432 static llvm::Expected<PythonString> FromUTF8(llvm::StringRef string); 433 434 PythonString(); 435 explicit PythonString(llvm::StringRef string); // safe, null on error 436 PythonString(PyRefType type, PyObject *o); 437 438 ~PythonString() override; 439 440 static bool Check(PyObject *py_obj); 441 442 // Bring in the no-argument base class version 443 using PythonObject::Reset; 444 445 void Reset(PyRefType type, PyObject *py_obj) override; 446 447 llvm::StringRef GetString() const; // safe, empty string on error 448 449 llvm::Expected<llvm::StringRef> AsUTF8() const; 450 451 size_t GetSize() const; 452 453 void SetString(llvm::StringRef string); // safe, null on error 454 455 StructuredData::StringSP CreateStructuredString() const; 456 }; 457 458 class PythonInteger : public PythonObject { 459 public: 460 PythonInteger(); 461 explicit PythonInteger(int64_t value); 462 PythonInteger(PyRefType type, PyObject *o); 463 464 ~PythonInteger() override; 465 466 static bool Check(PyObject *py_obj); 467 468 // Bring in the no-argument base class version 469 using PythonObject::Reset; 470 471 void Reset(PyRefType type, PyObject *py_obj) override; 472 473 int64_t GetInteger() const; 474 475 void SetInteger(int64_t value); 476 477 StructuredData::IntegerSP CreateStructuredInteger() const; 478 }; 479 480 class PythonBoolean : public PythonObject { 481 public: 482 PythonBoolean() = default; 483 explicit PythonBoolean(bool value); 484 PythonBoolean(PyRefType type, PyObject *o); 485 486 ~PythonBoolean() override = default; 487 488 static bool Check(PyObject *py_obj); 489 490 // Bring in the no-argument base class version 491 using PythonObject::Reset; 492 493 void Reset(PyRefType type, PyObject *py_obj) override; 494 495 bool GetValue() const; 496 497 void SetValue(bool value); 498 499 StructuredData::BooleanSP CreateStructuredBoolean() const; 500 }; 501 502 class PythonList : public PythonObject { 503 public: 504 PythonList() {} 505 explicit PythonList(PyInitialValue value); 506 explicit PythonList(int list_size); 507 PythonList(PyRefType type, PyObject *o); 508 509 ~PythonList() override; 510 511 static bool Check(PyObject *py_obj); 512 513 // Bring in the no-argument base class version 514 using PythonObject::Reset; 515 516 void Reset(PyRefType type, PyObject *py_obj) override; 517 518 uint32_t GetSize() const; 519 520 PythonObject GetItemAtIndex(uint32_t index) const; 521 522 void SetItemAtIndex(uint32_t index, const PythonObject &object); 523 524 void AppendItem(const PythonObject &object); 525 526 StructuredData::ArraySP CreateStructuredArray() const; 527 }; 528 529 class PythonTuple : public PythonObject { 530 public: 531 PythonTuple() {} 532 explicit PythonTuple(PyInitialValue value); 533 explicit PythonTuple(int tuple_size); 534 PythonTuple(PyRefType type, PyObject *o); 535 PythonTuple(std::initializer_list<PythonObject> objects); 536 PythonTuple(std::initializer_list<PyObject *> objects); 537 538 ~PythonTuple() override; 539 540 static bool Check(PyObject *py_obj); 541 542 // Bring in the no-argument base class version 543 using PythonObject::Reset; 544 545 void Reset(PyRefType type, PyObject *py_obj) override; 546 547 uint32_t GetSize() const; 548 549 PythonObject GetItemAtIndex(uint32_t index) const; 550 551 void SetItemAtIndex(uint32_t index, const PythonObject &object); 552 553 StructuredData::ArraySP CreateStructuredArray() const; 554 }; 555 556 class PythonDictionary : public PythonObject { 557 public: 558 PythonDictionary() {} 559 explicit PythonDictionary(PyInitialValue value); 560 PythonDictionary(PyRefType type, PyObject *o); 561 562 ~PythonDictionary() override; 563 564 static bool Check(PyObject *py_obj); 565 566 // Bring in the no-argument base class version 567 using PythonObject::Reset; 568 569 void Reset(PyRefType type, PyObject *py_obj) override; 570 571 uint32_t GetSize() const; 572 573 PythonList GetKeys() const; 574 575 PythonObject GetItemForKey(const PythonObject &key) const; 576 void SetItemForKey(const PythonObject &key, const PythonObject &value); 577 578 StructuredData::DictionarySP CreateStructuredDictionary() const; 579 }; 580 581 class PythonModule : public PythonObject { 582 public: 583 PythonModule(); 584 PythonModule(PyRefType type, PyObject *o); 585 586 ~PythonModule() override; 587 588 static bool Check(PyObject *py_obj); 589 590 static PythonModule BuiltinsModule(); 591 592 static PythonModule MainModule(); 593 594 static PythonModule AddModule(llvm::StringRef module); 595 596 // safe, returns invalid on error; 597 static PythonModule ImportModule(llvm::StringRef name) { 598 std::string s = name; 599 auto mod = Import(s.c_str()); 600 if (!mod) { 601 llvm::consumeError(mod.takeError()); 602 return PythonModule(); 603 } 604 return std::move(mod.get()); 605 } 606 607 static llvm::Expected<PythonModule> Import(const char *name); 608 609 llvm::Expected<PythonObject> Get(const char *name); 610 611 // Bring in the no-argument base class version 612 using PythonObject::Reset; 613 614 void Reset(PyRefType type, PyObject *py_obj) override; 615 616 PythonDictionary GetDictionary() const; 617 }; 618 619 class PythonCallable : public PythonObject { 620 public: 621 struct ArgInfo { 622 size_t count; 623 bool is_bound_method : 1; 624 bool has_varargs : 1; 625 bool has_kwargs : 1; 626 }; 627 628 PythonCallable(); 629 PythonCallable(PyRefType type, PyObject *o); 630 631 ~PythonCallable() override; 632 633 static bool Check(PyObject *py_obj); 634 635 // Bring in the no-argument base class version 636 using PythonObject::Reset; 637 638 void Reset(PyRefType type, PyObject *py_obj) override; 639 640 ArgInfo GetNumArguments() const; 641 642 // If the callable is a Py_Class, then find the number of arguments 643 // of the __init__ method. 644 ArgInfo GetNumInitArguments() const; 645 646 PythonObject operator()(); 647 648 PythonObject operator()(std::initializer_list<PyObject *> args); 649 650 PythonObject operator()(std::initializer_list<PythonObject> args); 651 652 template <typename Arg, typename... Args> 653 PythonObject operator()(const Arg &arg, Args... args) { 654 return operator()({arg, args...}); 655 } 656 }; 657 658 class PythonFile : public PythonObject { 659 public: 660 PythonFile(); 661 PythonFile(File &file, const char *mode); 662 PythonFile(PyRefType type, PyObject *o); 663 664 ~PythonFile() override; 665 666 static bool Check(PyObject *py_obj); 667 668 using PythonObject::Reset; 669 670 void Reset(PyRefType type, PyObject *py_obj) override; 671 void Reset(File &file, const char *mode); 672 673 lldb::FileUP GetUnderlyingFile() const; 674 675 llvm::Expected<lldb::FileSP> ConvertToFile(bool borrowed = false); 676 llvm::Expected<lldb::FileSP> 677 ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed = false); 678 }; 679 680 class PythonException : public llvm::ErrorInfo<PythonException> { 681 private: 682 PyObject *m_exception_type, *m_exception, *m_traceback; 683 PyObject *m_repr_bytes; 684 685 public: 686 static char ID; 687 const char *toCString() const; 688 PythonException(const char *caller = nullptr); 689 void Restore(); 690 ~PythonException(); 691 void log(llvm::raw_ostream &OS) const override; 692 std::error_code convertToErrorCode() const override; 693 }; 694 695 // This extracts the underlying T out of an Expected<T> and returns it. 696 // If the Expected is an Error instead of a T, that error will be converted 697 // into a python exception, and this will return a default-constructed T. 698 // 699 // This is appropriate for use right at the boundary of python calling into 700 // C++, such as in a SWIG typemap. In such a context you should simply 701 // check if the returned T is valid, and if it is, return a NULL back 702 // to python. This will result in the Error being raised as an exception 703 // from python code's point of view. 704 // 705 // For example: 706 // ``` 707 // Expected<Foo *> efoop = some_cpp_function(); 708 // Foo *foop = unwrapOrSetPythonException(efoop); 709 // if (!foop) 710 // return NULL; 711 // do_something(*foop); 712 // 713 // If the Error returned was itself created because a python exception was 714 // raised when C++ code called into python, then the original exception 715 // will be restored. Otherwise a simple string exception will be raised. 716 template <typename T> T unwrapOrSetPythonException(llvm::Expected<T> expected) { 717 if (expected) 718 return expected.get(); 719 llvm::handleAllErrors( 720 expected.takeError(), [](PythonException &E) { E.Restore(); }, 721 [](const llvm::ErrorInfoBase &E) { 722 PyErr_SetString(PyExc_Exception, E.message().c_str()); 723 }); 724 return T(); 725 } 726 727 } // namespace lldb_private 728 729 #endif 730 731 #endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H 732