1 //===-- PythonDataObjects.h--------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // 10 // !! FIXME FIXME FIXME !! 11 // 12 // Python APIs nearly all can return an exception. They do this 13 // by returning NULL, or -1, or some such value and setting 14 // the exception state with PyErr_Set*(). Exceptions must be 15 // handled before further python API functions are called. Failure 16 // to do so will result in asserts on debug builds of python. 17 // It will also sometimes, but not usually result in crashes of 18 // release builds. 19 // 20 // Nearly all the code in this header does not handle python exceptions 21 // correctly. It should all be converted to return Expected<> or 22 // Error types to capture the exception. 23 // 24 // Everything in this file except functions that return Error or 25 // Expected<> is considered deprecated and should not be 26 // used in new code. If you need to use it, fix it first. 27 // 28 // 29 // TODOs for this file 30 // 31 // * Make all methods safe for exceptions. 32 // 33 // * Eliminate method signatures that must translate exceptions into 34 // empty objects or NULLs. Almost everything here should return 35 // Expected<>. It should be acceptable for certain operations that 36 // can never fail to assert instead, such as the creation of 37 // PythonString from a string literal. 38 // 39 // * Elimintate Reset(), and make all non-default constructors private. 40 // Python objects should be created with Retain<> or Take<>, and they 41 // should be assigned with operator= 42 // 43 // * Eliminate default constructors, make python objects always 44 // nonnull, and use optionals where necessary. 45 // 46 47 48 #ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H 49 #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H 50 51 #ifndef LLDB_DISABLE_PYTHON 52 53 // LLDB Python header must be included first 54 #include "lldb-python.h" 55 56 #include "lldb/Host/File.h" 57 #include "lldb/Utility/StructuredData.h" 58 59 #include "llvm/ADT/ArrayRef.h" 60 61 namespace lldb_private { 62 63 class PythonObject; 64 class PythonBytes; 65 class PythonString; 66 class PythonList; 67 class PythonDictionary; 68 class PythonInteger; 69 class PythonException; 70 71 class StructuredPythonObject : public StructuredData::Generic { 72 public: 73 StructuredPythonObject() : StructuredData::Generic() {} 74 75 StructuredPythonObject(void *obj) : StructuredData::Generic(obj) { 76 Py_XINCREF(GetValue()); 77 } 78 79 ~StructuredPythonObject() override { 80 if (Py_IsInitialized()) 81 Py_XDECREF(GetValue()); 82 SetValue(nullptr); 83 } 84 85 bool IsValid() const override { return GetValue() && GetValue() != Py_None; } 86 87 void Serialize(llvm::json::OStream &s) const override; 88 89 private: 90 DISALLOW_COPY_AND_ASSIGN(StructuredPythonObject); 91 }; 92 93 enum class PyObjectType { 94 Unknown, 95 None, 96 Boolean, 97 Integer, 98 Dictionary, 99 List, 100 String, 101 Bytes, 102 ByteArray, 103 Module, 104 Callable, 105 Tuple, 106 File 107 }; 108 109 enum class PyRefType { 110 Borrowed, // We are not given ownership of the incoming PyObject. 111 // We cannot safely hold it without calling Py_INCREF. 112 Owned // We have ownership of the incoming PyObject. We should 113 // not call Py_INCREF. 114 }; 115 116 namespace python { 117 118 // Take a reference that you already own, and turn it into 119 // a PythonObject. 120 // 121 // Most python API methods will return a +1 reference 122 // if they succeed or NULL if and only if 123 // they set an exception. Use this to collect such return 124 // values, after checking for NULL. 125 // 126 // If T is not just PythonObject, then obj must be already be 127 // checked to be of the correct type. 128 template <typename T> T Take(PyObject *obj) { 129 assert(obj); 130 assert(!PyErr_Occurred()); 131 T thing(PyRefType::Owned, obj); 132 assert(thing.IsValid()); 133 return std::move(thing); 134 } 135 136 // Retain a reference you have borrowed, and turn it into 137 // a PythonObject. 138 // 139 // A minority of python APIs return a borrowed reference 140 // instead of a +1. They will also return NULL if and only 141 // if they set an exception. Use this to collect such return 142 // values, after checking for NULL. 143 // 144 // If T is not just PythonObject, then obj must be already be 145 // checked to be of the correct type. 146 template <typename T> T Retain(PyObject *obj) { 147 assert(obj); 148 assert(!PyErr_Occurred()); 149 T thing(PyRefType::Borrowed, obj); 150 assert(thing.IsValid()); 151 return std::move(thing); 152 } 153 154 } // namespace python 155 156 enum class PyInitialValue { Invalid, Empty }; 157 158 template <typename T, typename Enable = void> struct PythonFormat; 159 160 template <> struct PythonFormat<unsigned long long> { 161 static constexpr char format = 'K'; 162 static auto get(unsigned long long value) { return value; } 163 }; 164 165 template <> struct PythonFormat<long long> { 166 static constexpr char format = 'L'; 167 static auto get(long long value) { return value; } 168 }; 169 170 template <typename T> 171 struct PythonFormat< 172 T, typename std::enable_if<std::is_base_of<PythonObject, T>::value>::type> { 173 static constexpr char format = 'O'; 174 static auto get(const T &value) { return value.get(); } 175 }; 176 177 class PythonObject { 178 public: 179 PythonObject() : m_py_obj(nullptr) {} 180 181 PythonObject(PyRefType type, PyObject *py_obj) : m_py_obj(nullptr) { 182 Reset(type, py_obj); 183 } 184 185 PythonObject(const PythonObject &rhs) : m_py_obj(nullptr) { Reset(rhs); } 186 187 PythonObject(PythonObject &&rhs) { 188 m_py_obj = rhs.m_py_obj; 189 rhs.m_py_obj = nullptr; 190 } 191 192 ~PythonObject() { Reset(); } 193 194 void Reset() { 195 if (m_py_obj && Py_IsInitialized()) 196 Py_DECREF(m_py_obj); 197 m_py_obj = nullptr; 198 } 199 200 void Reset(const PythonObject &rhs) { 201 if (!rhs.IsValid()) 202 Reset(); 203 else 204 Reset(PyRefType::Borrowed, rhs.m_py_obj); 205 } 206 207 // PythonObject is implicitly convertible to PyObject *, which will call the 208 // wrong overload. We want to explicitly disallow this, since a PyObject 209 // *always* owns its reference. Therefore the overload which takes a 210 // PyRefType doesn't make sense, and the copy constructor should be used. 211 void Reset(PyRefType type, const PythonObject &ref) = delete; 212 213 void Reset(PyRefType type, PyObject *py_obj) { 214 if (py_obj == m_py_obj) 215 return; 216 217 if (Py_IsInitialized()) 218 Py_XDECREF(m_py_obj); 219 220 m_py_obj = py_obj; 221 222 // If this is a borrowed reference, we need to convert it to 223 // an owned reference by incrementing it. If it is an owned 224 // reference (for example the caller allocated it with PyDict_New() 225 // then we must *not* increment it. 226 if (m_py_obj && Py_IsInitialized() && type == PyRefType::Borrowed) 227 Py_XINCREF(m_py_obj); 228 } 229 230 void Dump() const { 231 if (m_py_obj) 232 _PyObject_Dump(m_py_obj); 233 else 234 puts("NULL"); 235 } 236 237 void Dump(Stream &strm) const; 238 239 PyObject *get() const { return m_py_obj; } 240 241 PyObject *release() { 242 PyObject *result = m_py_obj; 243 m_py_obj = nullptr; 244 return result; 245 } 246 247 PythonObject &operator=(const PythonObject &other) { 248 Reset(PyRefType::Borrowed, other.get()); 249 return *this; 250 } 251 252 void Reset(PythonObject &&other) { 253 Reset(); 254 m_py_obj = other.m_py_obj; 255 other.m_py_obj = nullptr; 256 } 257 258 PythonObject &operator=(PythonObject &&other) { 259 Reset(std::move(other)); 260 return *this; 261 } 262 263 PyObjectType GetObjectType() const; 264 265 PythonString Repr() const; 266 267 PythonString Str() const; 268 269 static PythonObject ResolveNameWithDictionary(llvm::StringRef name, 270 const PythonDictionary &dict); 271 272 template <typename T> 273 static T ResolveNameWithDictionary(llvm::StringRef name, 274 const PythonDictionary &dict) { 275 return ResolveNameWithDictionary(name, dict).AsType<T>(); 276 } 277 278 PythonObject ResolveName(llvm::StringRef name) const; 279 280 template <typename T> T ResolveName(llvm::StringRef name) const { 281 return ResolveName(name).AsType<T>(); 282 } 283 284 bool HasAttribute(llvm::StringRef attribute) const; 285 286 PythonObject GetAttributeValue(llvm::StringRef attribute) const; 287 288 bool IsNone() const { return m_py_obj == Py_None; } 289 290 bool IsValid() const { return m_py_obj != nullptr; } 291 292 bool IsAllocated() const { return IsValid() && !IsNone(); } 293 294 explicit operator bool() const { return IsValid() && !IsNone(); } 295 296 template <typename T> T AsType() const { 297 if (!T::Check(m_py_obj)) 298 return T(); 299 return T(PyRefType::Borrowed, m_py_obj); 300 } 301 302 StructuredData::ObjectSP CreateStructuredObject() const; 303 304 protected: 305 static llvm::Error nullDeref() { 306 return llvm::createStringError(llvm::inconvertibleErrorCode(), 307 "A NULL PyObject* was dereferenced"); 308 } 309 static llvm::Error exception(const char *s = nullptr) { 310 return llvm::make_error<PythonException>(s); 311 } 312 static llvm::Error keyError() { 313 return llvm::createStringError(llvm::inconvertibleErrorCode(), 314 "key not in dict"); 315 } 316 317 #if PY_MAJOR_VERSION < 3 318 // The python 2 API declares some arguments as char* that should 319 // be const char *, but it doesn't actually modify them. 320 static char *py2_const_cast(const char *s) { return const_cast<char *>(s); } 321 #else 322 static const char *py2_const_cast(const char *s) { return s; } 323 #endif 324 325 public: 326 template <typename... T> 327 llvm::Expected<PythonObject> CallMethod(const char *name, 328 const T &... t) const { 329 const char format[] = {'(', PythonFormat<T>::format..., ')', 0}; 330 PyObject *obj = 331 PyObject_CallMethod(m_py_obj, py2_const_cast(name), 332 py2_const_cast(format), PythonFormat<T>::get(t)...); 333 if (!obj) 334 return exception(); 335 return python::Take<PythonObject>(obj); 336 } 337 338 template <typename... T> 339 llvm::Expected<PythonObject> Call(const T &... t) const { 340 const char format[] = {'(', PythonFormat<T>::format..., ')', 0}; 341 PyObject *obj = PyObject_CallFunction(m_py_obj, py2_const_cast(format), 342 PythonFormat<T>::get(t)...); 343 if (!obj) 344 return exception(); 345 return python::Take<PythonObject>(obj); 346 } 347 348 llvm::Expected<PythonObject> GetAttribute(const char *name) const { 349 if (!m_py_obj) 350 return nullDeref(); 351 PyObject *obj = PyObject_GetAttrString(m_py_obj, name); 352 if (!obj) 353 return exception(); 354 return python::Take<PythonObject>(obj); 355 } 356 357 llvm::Expected<bool> IsTrue() { 358 if (!m_py_obj) 359 return nullDeref(); 360 int r = PyObject_IsTrue(m_py_obj); 361 if (r < 0) 362 return exception(); 363 return !!r; 364 } 365 366 llvm::Expected<long long> AsLongLong() { 367 if (!m_py_obj) 368 return nullDeref(); 369 assert(!PyErr_Occurred()); 370 long long r = PyLong_AsLongLong(m_py_obj); 371 if (PyErr_Occurred()) 372 return exception(); 373 return r; 374 } 375 376 llvm::Expected<bool> IsInstance(const PythonObject &cls) { 377 if (!m_py_obj || !cls.IsValid()) 378 return nullDeref(); 379 int r = PyObject_IsInstance(m_py_obj, cls.get()); 380 if (r < 0) 381 return exception(); 382 return !!r; 383 } 384 385 protected: 386 PyObject *m_py_obj; 387 }; 388 389 namespace python { 390 391 // This is why C++ needs monads. 392 template <typename T> llvm::Expected<T> As(llvm::Expected<PythonObject> &&obj) { 393 if (!obj) 394 return obj.takeError(); 395 if (!T::Check(obj.get().get())) 396 return llvm::createStringError(llvm::inconvertibleErrorCode(), 397 "type error"); 398 return T(PyRefType::Borrowed, std::move(obj.get().get())); 399 } 400 401 template <> llvm::Expected<bool> As<bool>(llvm::Expected<PythonObject> &&obj); 402 403 template <> 404 llvm::Expected<long long> As<long long>(llvm::Expected<PythonObject> &&obj); 405 406 template <> 407 llvm::Expected<std::string> As<std::string>(llvm::Expected<PythonObject> &&obj); 408 409 } // namespace python 410 411 template <class T> class TypedPythonObject : public PythonObject { 412 public: 413 // override to perform implicit type conversions on Reset 414 // This can be eliminated once we drop python 2 support. 415 static void Convert(PyRefType &type, PyObject *&py_obj) {} 416 417 using PythonObject::Reset; 418 419 void Reset(PyRefType type, PyObject *py_obj) { 420 Reset(); 421 if (!py_obj) 422 return; 423 T::Convert(type, py_obj); 424 if (T::Check(py_obj)) 425 PythonObject::Reset(type, py_obj); 426 else if (type == PyRefType::Owned) 427 Py_DECREF(py_obj); 428 } 429 430 TypedPythonObject(PyRefType type, PyObject *py_obj) { Reset(type, py_obj); } 431 432 TypedPythonObject() {} 433 }; 434 435 class PythonBytes : public TypedPythonObject<PythonBytes> { 436 public: 437 using TypedPythonObject::TypedPythonObject; 438 explicit PythonBytes(llvm::ArrayRef<uint8_t> bytes); 439 PythonBytes(const uint8_t *bytes, size_t length); 440 441 static bool Check(PyObject *py_obj); 442 443 llvm::ArrayRef<uint8_t> GetBytes() const; 444 445 size_t GetSize() const; 446 447 void SetBytes(llvm::ArrayRef<uint8_t> stringbytes); 448 449 StructuredData::StringSP CreateStructuredString() const; 450 }; 451 452 class PythonByteArray : public TypedPythonObject<PythonByteArray> { 453 public: 454 using TypedPythonObject::TypedPythonObject; 455 explicit PythonByteArray(llvm::ArrayRef<uint8_t> bytes); 456 PythonByteArray(const uint8_t *bytes, size_t length); 457 PythonByteArray(const PythonBytes &object); 458 459 static bool Check(PyObject *py_obj); 460 461 llvm::ArrayRef<uint8_t> GetBytes() const; 462 463 size_t GetSize() const; 464 465 void SetBytes(llvm::ArrayRef<uint8_t> stringbytes); 466 467 StructuredData::StringSP CreateStructuredString() const; 468 }; 469 470 class PythonString : public TypedPythonObject<PythonString> { 471 public: 472 using TypedPythonObject::TypedPythonObject; 473 static llvm::Expected<PythonString> FromUTF8(llvm::StringRef string); 474 475 PythonString() : TypedPythonObject() {} // MSVC requires this for some reason 476 477 explicit PythonString(llvm::StringRef string); // safe, null on error 478 479 static bool Check(PyObject *py_obj); 480 static void Convert(PyRefType &type, PyObject *&py_obj); 481 482 llvm::StringRef GetString() const; // safe, empty string on error 483 484 llvm::Expected<llvm::StringRef> AsUTF8() const; 485 486 size_t GetSize() const; 487 488 void SetString(llvm::StringRef string); // safe, null on error 489 490 StructuredData::StringSP CreateStructuredString() const; 491 }; 492 493 class PythonInteger : public TypedPythonObject<PythonInteger> { 494 public: 495 using TypedPythonObject::TypedPythonObject; 496 497 PythonInteger() : TypedPythonObject() {} // MSVC requires this for some reason 498 499 explicit PythonInteger(int64_t value); 500 501 static bool Check(PyObject *py_obj); 502 static void Convert(PyRefType &type, PyObject *&py_obj); 503 504 int64_t GetInteger() const; 505 506 void SetInteger(int64_t value); 507 508 StructuredData::IntegerSP CreateStructuredInteger() const; 509 }; 510 511 class PythonBoolean : public TypedPythonObject<PythonBoolean> { 512 public: 513 using TypedPythonObject::TypedPythonObject; 514 515 explicit PythonBoolean(bool value); 516 517 static bool Check(PyObject *py_obj); 518 519 bool GetValue() const; 520 521 void SetValue(bool value); 522 523 StructuredData::BooleanSP CreateStructuredBoolean() const; 524 }; 525 526 class PythonList : public TypedPythonObject<PythonList> { 527 public: 528 using TypedPythonObject::TypedPythonObject; 529 530 PythonList() : TypedPythonObject() {} // MSVC requires this for some reason 531 532 explicit PythonList(PyInitialValue value); 533 explicit PythonList(int list_size); 534 535 static bool Check(PyObject *py_obj); 536 537 uint32_t GetSize() const; 538 539 PythonObject GetItemAtIndex(uint32_t index) const; 540 541 void SetItemAtIndex(uint32_t index, const PythonObject &object); 542 543 void AppendItem(const PythonObject &object); 544 545 StructuredData::ArraySP CreateStructuredArray() const; 546 }; 547 548 class PythonTuple : public TypedPythonObject<PythonTuple> { 549 public: 550 using TypedPythonObject::TypedPythonObject; 551 552 explicit PythonTuple(PyInitialValue value); 553 explicit PythonTuple(int tuple_size); 554 PythonTuple(std::initializer_list<PythonObject> objects); 555 PythonTuple(std::initializer_list<PyObject *> objects); 556 557 static bool Check(PyObject *py_obj); 558 559 uint32_t GetSize() const; 560 561 PythonObject GetItemAtIndex(uint32_t index) const; 562 563 void SetItemAtIndex(uint32_t index, const PythonObject &object); 564 565 StructuredData::ArraySP CreateStructuredArray() const; 566 }; 567 568 class PythonDictionary : public TypedPythonObject<PythonDictionary> { 569 public: 570 using TypedPythonObject::TypedPythonObject; 571 572 PythonDictionary() : TypedPythonObject() {} // MSVC requires this for some reason 573 574 explicit PythonDictionary(PyInitialValue value); 575 576 static bool Check(PyObject *py_obj); 577 578 uint32_t GetSize() const; 579 580 PythonList GetKeys() const; 581 582 PythonObject GetItemForKey(const PythonObject &key) const; // DEPRECATED 583 void SetItemForKey(const PythonObject &key, 584 const PythonObject &value); // DEPRECATED 585 586 llvm::Expected<PythonObject> GetItem(const PythonObject &key) const; 587 llvm::Expected<PythonObject> GetItem(const char *key) const; 588 llvm::Error SetItem(const PythonObject &key, const PythonObject &value) const; 589 llvm::Error SetItem(const char *key, const PythonObject &value) const; 590 591 StructuredData::DictionarySP CreateStructuredDictionary() const; 592 }; 593 594 class PythonModule : public TypedPythonObject<PythonModule> { 595 public: 596 using TypedPythonObject::TypedPythonObject; 597 598 static bool Check(PyObject *py_obj); 599 600 static PythonModule BuiltinsModule(); 601 602 static PythonModule MainModule(); 603 604 static PythonModule AddModule(llvm::StringRef module); 605 606 // safe, returns invalid on error; 607 static PythonModule ImportModule(llvm::StringRef name) { 608 std::string s = name; 609 auto mod = Import(s.c_str()); 610 if (!mod) { 611 llvm::consumeError(mod.takeError()); 612 return PythonModule(); 613 } 614 return std::move(mod.get()); 615 } 616 617 static llvm::Expected<PythonModule> Import(const char *name); 618 619 llvm::Expected<PythonObject> Get(const char *name); 620 621 PythonDictionary GetDictionary() const; 622 }; 623 624 class PythonCallable : public TypedPythonObject<PythonCallable> { 625 public: 626 using TypedPythonObject::TypedPythonObject; 627 628 struct ArgInfo { 629 /* the number of positional arguments, including optional ones, 630 * and excluding varargs. If this is a bound method, then the 631 * count will still include a +1 for self. 632 * 633 * FIXME. That's crazy. This should be replaced with 634 * an accurate min and max for positional args. 635 */ 636 int count; 637 /* does the callable have positional varargs? */ 638 bool has_varargs : 1; // FIXME delete this 639 /* is the callable a bound method written in python? */ 640 bool is_bound_method : 1; // FIXME delete this 641 }; 642 643 static bool Check(PyObject *py_obj); 644 645 llvm::Expected<ArgInfo> GetArgInfo() const; 646 647 llvm::Expected<ArgInfo> GetInitArgInfo() const; 648 649 ArgInfo GetNumArguments() const; // DEPRECATED 650 651 // If the callable is a Py_Class, then find the number of arguments 652 // of the __init__ method. 653 ArgInfo GetNumInitArguments() const; // DEPRECATED 654 655 PythonObject operator()(); 656 657 PythonObject operator()(std::initializer_list<PyObject *> args); 658 659 PythonObject operator()(std::initializer_list<PythonObject> args); 660 661 template <typename Arg, typename... Args> 662 PythonObject operator()(const Arg &arg, Args... args) { 663 return operator()({arg, args...}); 664 } 665 }; 666 667 class PythonFile : public TypedPythonObject<PythonFile> { 668 public: 669 using TypedPythonObject::TypedPythonObject; 670 671 PythonFile() : TypedPythonObject() {} // MSVC requires this for some reason 672 673 static bool Check(PyObject *py_obj); 674 675 static llvm::Expected<PythonFile> FromFile(File &file, 676 const char *mode = nullptr); 677 678 llvm::Expected<lldb::FileSP> ConvertToFile(bool borrowed = false); 679 llvm::Expected<lldb::FileSP> 680 ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed = false); 681 }; 682 683 class PythonException : public llvm::ErrorInfo<PythonException> { 684 private: 685 PyObject *m_exception_type, *m_exception, *m_traceback; 686 PyObject *m_repr_bytes; 687 688 public: 689 static char ID; 690 const char *toCString() const; 691 PythonException(const char *caller = nullptr); 692 void Restore(); 693 ~PythonException(); 694 void log(llvm::raw_ostream &OS) const override; 695 std::error_code convertToErrorCode() const override; 696 }; 697 698 // This extracts the underlying T out of an Expected<T> and returns it. 699 // If the Expected is an Error instead of a T, that error will be converted 700 // into a python exception, and this will return a default-constructed T. 701 // 702 // This is appropriate for use right at the boundary of python calling into 703 // C++, such as in a SWIG typemap. In such a context you should simply 704 // check if the returned T is valid, and if it is, return a NULL back 705 // to python. This will result in the Error being raised as an exception 706 // from python code's point of view. 707 // 708 // For example: 709 // ``` 710 // Expected<Foo *> efoop = some_cpp_function(); 711 // Foo *foop = unwrapOrSetPythonException(efoop); 712 // if (!foop) 713 // return NULL; 714 // do_something(*foop); 715 // 716 // If the Error returned was itself created because a python exception was 717 // raised when C++ code called into python, then the original exception 718 // will be restored. Otherwise a simple string exception will be raised. 719 template <typename T> T unwrapOrSetPythonException(llvm::Expected<T> expected) { 720 if (expected) 721 return expected.get(); 722 llvm::handleAllErrors( 723 expected.takeError(), [](PythonException &E) { E.Restore(); }, 724 [](const llvm::ErrorInfoBase &E) { 725 PyErr_SetString(PyExc_Exception, E.message().c_str()); 726 }); 727 return T(); 728 } 729 730 } // namespace lldb_private 731 732 #endif 733 734 #endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H 735