1 //===-- PythonDataObjects.h--------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // 10 // !! FIXME FIXME FIXME !! 11 // 12 // Python APIs nearly all can return an exception. They do this 13 // by returning NULL, or -1, or some such value and setting 14 // the exception state with PyErr_Set*(). Exceptions must be 15 // handled before further python API functions are called. Failure 16 // to do so will result in asserts on debug builds of python. 17 // It will also sometimes, but not usually result in crashes of 18 // release builds. 19 // 20 // Nearly all the code in this header does not handle python exceptions 21 // correctly. It should all be converted to return Expected<> or 22 // Error types to capture the exception. 23 // 24 // Everything in this file except functions that return Error or 25 // Expected<> is considered deprecated and should not be 26 // used in new code. If you need to use it, fix it first. 27 // 28 // 29 // TODOs for this file 30 // 31 // * Make all methods safe for exceptions. 32 // 33 // * Eliminate method signatures that must translate exceptions into 34 // empty objects or NULLs. Almost everything here should return 35 // Expected<>. It should be acceptable for certain operations that 36 // can never fail to assert instead, such as the creation of 37 // PythonString from a string literal. 38 // 39 // * Elimintate Reset(), and make all non-default constructors private. 40 // Python objects should be created with Retain<> or Take<>, and they 41 // should be assigned with operator= 42 // 43 // * Eliminate default constructors, make python objects always 44 // nonnull, and use optionals where necessary. 45 // 46 47 48 #ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H 49 #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H 50 51 #ifndef LLDB_DISABLE_PYTHON 52 53 // LLDB Python header must be included first 54 #include "lldb-python.h" 55 56 #include "lldb/Host/File.h" 57 #include "lldb/Utility/StructuredData.h" 58 59 #include "llvm/ADT/ArrayRef.h" 60 61 namespace lldb_private { 62 63 class PythonObject; 64 class PythonBytes; 65 class PythonString; 66 class PythonList; 67 class PythonDictionary; 68 class PythonInteger; 69 class PythonException; 70 71 class StructuredPythonObject : public StructuredData::Generic { 72 public: 73 StructuredPythonObject() : StructuredData::Generic() {} 74 75 StructuredPythonObject(void *obj) : StructuredData::Generic(obj) { 76 Py_XINCREF(GetValue()); 77 } 78 79 ~StructuredPythonObject() override { 80 if (Py_IsInitialized()) 81 Py_XDECREF(GetValue()); 82 SetValue(nullptr); 83 } 84 85 bool IsValid() const override { return GetValue() && GetValue() != Py_None; } 86 87 void Serialize(llvm::json::OStream &s) const override; 88 89 private: 90 DISALLOW_COPY_AND_ASSIGN(StructuredPythonObject); 91 }; 92 93 enum class PyObjectType { 94 Unknown, 95 None, 96 Boolean, 97 Integer, 98 Dictionary, 99 List, 100 String, 101 Bytes, 102 ByteArray, 103 Module, 104 Callable, 105 Tuple, 106 File 107 }; 108 109 enum class PyRefType { 110 Borrowed, // We are not given ownership of the incoming PyObject. 111 // We cannot safely hold it without calling Py_INCREF. 112 Owned // We have ownership of the incoming PyObject. We should 113 // not call Py_INCREF. 114 }; 115 116 namespace python { 117 118 // Take a reference that you already own, and turn it into 119 // a PythonObject. 120 // 121 // Most python API methods will return a +1 reference 122 // if they succeed or NULL if and only if 123 // they set an exception. Use this to collect such return 124 // values, after checking for NULL. 125 // 126 // If T is not just PythonObject, then obj must be already be 127 // checked to be of the correct type. 128 template <typename T> T Take(PyObject *obj) { 129 assert(obj); 130 assert(!PyErr_Occurred()); 131 T thing(PyRefType::Owned, obj); 132 assert(thing.IsValid()); 133 return std::move(thing); 134 } 135 136 // Retain a reference you have borrowed, and turn it into 137 // a PythonObject. 138 // 139 // A minority of python APIs return a borrowed reference 140 // instead of a +1. They will also return NULL if and only 141 // if they set an exception. Use this to collect such return 142 // values, after checking for NULL. 143 // 144 // If T is not just PythonObject, then obj must be already be 145 // checked to be of the correct type. 146 template <typename T> T Retain(PyObject *obj) { 147 assert(obj); 148 assert(!PyErr_Occurred()); 149 T thing(PyRefType::Borrowed, obj); 150 assert(thing.IsValid()); 151 return std::move(thing); 152 } 153 154 } // namespace python 155 156 enum class PyInitialValue { Invalid, Empty }; 157 158 template <typename T, typename Enable = void> struct PythonFormat; 159 160 template <> struct PythonFormat<unsigned long long> { 161 static constexpr char format = 'K'; 162 static auto get(unsigned long long value) { return value; } 163 }; 164 165 template <> struct PythonFormat<long long> { 166 static constexpr char format = 'L'; 167 static auto get(long long value) { return value; } 168 }; 169 170 template <typename T> 171 struct PythonFormat< 172 T, typename std::enable_if<std::is_base_of<PythonObject, T>::value>::type> { 173 static constexpr char format = 'O'; 174 static auto get(const T &value) { return value.get(); } 175 }; 176 177 class PythonObject { 178 public: 179 PythonObject() : m_py_obj(nullptr) {} 180 181 PythonObject(PyRefType type, PyObject *py_obj) : m_py_obj(nullptr) { 182 Reset(type, py_obj); 183 } 184 185 PythonObject(const PythonObject &rhs) 186 : PythonObject(PyRefType::Borrowed, rhs.m_py_obj) {} 187 188 PythonObject(PythonObject &&rhs) { 189 m_py_obj = rhs.m_py_obj; 190 rhs.m_py_obj = nullptr; 191 } 192 193 ~PythonObject() { Reset(); } 194 195 void Reset() { 196 if (m_py_obj && Py_IsInitialized()) 197 Py_DECREF(m_py_obj); 198 m_py_obj = nullptr; 199 } 200 201 void Reset(PyRefType type, PyObject *py_obj) { 202 if (py_obj == m_py_obj) 203 return; 204 205 if (Py_IsInitialized()) 206 Py_XDECREF(m_py_obj); 207 208 m_py_obj = py_obj; 209 210 // If this is a borrowed reference, we need to convert it to 211 // an owned reference by incrementing it. If it is an owned 212 // reference (for example the caller allocated it with PyDict_New() 213 // then we must *not* increment it. 214 if (m_py_obj && Py_IsInitialized() && type == PyRefType::Borrowed) 215 Py_XINCREF(m_py_obj); 216 } 217 218 void Dump() const { 219 if (m_py_obj) 220 _PyObject_Dump(m_py_obj); 221 else 222 puts("NULL"); 223 } 224 225 void Dump(Stream &strm) const; 226 227 PyObject *get() const { return m_py_obj; } 228 229 PyObject *release() { 230 PyObject *result = m_py_obj; 231 m_py_obj = nullptr; 232 return result; 233 } 234 235 PythonObject &operator=(PythonObject other) { 236 Reset(); 237 m_py_obj = std::exchange(other.m_py_obj, nullptr); 238 return *this; 239 } 240 241 PyObjectType GetObjectType() const; 242 243 PythonString Repr() const; 244 245 PythonString Str() const; 246 247 static PythonObject ResolveNameWithDictionary(llvm::StringRef name, 248 const PythonDictionary &dict); 249 250 template <typename T> 251 static T ResolveNameWithDictionary(llvm::StringRef name, 252 const PythonDictionary &dict) { 253 return ResolveNameWithDictionary(name, dict).AsType<T>(); 254 } 255 256 PythonObject ResolveName(llvm::StringRef name) const; 257 258 template <typename T> T ResolveName(llvm::StringRef name) const { 259 return ResolveName(name).AsType<T>(); 260 } 261 262 bool HasAttribute(llvm::StringRef attribute) const; 263 264 PythonObject GetAttributeValue(llvm::StringRef attribute) const; 265 266 bool IsNone() const { return m_py_obj == Py_None; } 267 268 bool IsValid() const { return m_py_obj != nullptr; } 269 270 bool IsAllocated() const { return IsValid() && !IsNone(); } 271 272 explicit operator bool() const { return IsValid() && !IsNone(); } 273 274 template <typename T> T AsType() const { 275 if (!T::Check(m_py_obj)) 276 return T(); 277 return T(PyRefType::Borrowed, m_py_obj); 278 } 279 280 StructuredData::ObjectSP CreateStructuredObject() const; 281 282 protected: 283 static llvm::Error nullDeref() { 284 return llvm::createStringError(llvm::inconvertibleErrorCode(), 285 "A NULL PyObject* was dereferenced"); 286 } 287 static llvm::Error exception(const char *s = nullptr) { 288 return llvm::make_error<PythonException>(s); 289 } 290 static llvm::Error keyError() { 291 return llvm::createStringError(llvm::inconvertibleErrorCode(), 292 "key not in dict"); 293 } 294 295 #if PY_MAJOR_VERSION < 3 296 // The python 2 API declares some arguments as char* that should 297 // be const char *, but it doesn't actually modify them. 298 static char *py2_const_cast(const char *s) { return const_cast<char *>(s); } 299 #else 300 static const char *py2_const_cast(const char *s) { return s; } 301 #endif 302 303 public: 304 template <typename... T> 305 llvm::Expected<PythonObject> CallMethod(const char *name, 306 const T &... t) const { 307 const char format[] = {'(', PythonFormat<T>::format..., ')', 0}; 308 PyObject *obj = 309 PyObject_CallMethod(m_py_obj, py2_const_cast(name), 310 py2_const_cast(format), PythonFormat<T>::get(t)...); 311 if (!obj) 312 return exception(); 313 return python::Take<PythonObject>(obj); 314 } 315 316 template <typename... T> 317 llvm::Expected<PythonObject> Call(const T &... t) const { 318 const char format[] = {'(', PythonFormat<T>::format..., ')', 0}; 319 PyObject *obj = PyObject_CallFunction(m_py_obj, py2_const_cast(format), 320 PythonFormat<T>::get(t)...); 321 if (!obj) 322 return exception(); 323 return python::Take<PythonObject>(obj); 324 } 325 326 llvm::Expected<PythonObject> GetAttribute(const char *name) const { 327 if (!m_py_obj) 328 return nullDeref(); 329 PyObject *obj = PyObject_GetAttrString(m_py_obj, name); 330 if (!obj) 331 return exception(); 332 return python::Take<PythonObject>(obj); 333 } 334 335 llvm::Expected<bool> IsTrue() { 336 if (!m_py_obj) 337 return nullDeref(); 338 int r = PyObject_IsTrue(m_py_obj); 339 if (r < 0) 340 return exception(); 341 return !!r; 342 } 343 344 llvm::Expected<long long> AsLongLong() { 345 if (!m_py_obj) 346 return nullDeref(); 347 assert(!PyErr_Occurred()); 348 long long r = PyLong_AsLongLong(m_py_obj); 349 if (PyErr_Occurred()) 350 return exception(); 351 return r; 352 } 353 354 llvm::Expected<bool> IsInstance(const PythonObject &cls) { 355 if (!m_py_obj || !cls.IsValid()) 356 return nullDeref(); 357 int r = PyObject_IsInstance(m_py_obj, cls.get()); 358 if (r < 0) 359 return exception(); 360 return !!r; 361 } 362 363 protected: 364 PyObject *m_py_obj; 365 }; 366 367 namespace python { 368 369 // This is why C++ needs monads. 370 template <typename T> llvm::Expected<T> As(llvm::Expected<PythonObject> &&obj) { 371 if (!obj) 372 return obj.takeError(); 373 if (!T::Check(obj.get().get())) 374 return llvm::createStringError(llvm::inconvertibleErrorCode(), 375 "type error"); 376 return T(PyRefType::Borrowed, std::move(obj.get().get())); 377 } 378 379 template <> llvm::Expected<bool> As<bool>(llvm::Expected<PythonObject> &&obj); 380 381 template <> 382 llvm::Expected<long long> As<long long>(llvm::Expected<PythonObject> &&obj); 383 384 template <> 385 llvm::Expected<std::string> As<std::string>(llvm::Expected<PythonObject> &&obj); 386 387 } // namespace python 388 389 template <class T> class TypedPythonObject : public PythonObject { 390 public: 391 // override to perform implicit type conversions on Reset 392 // This can be eliminated once we drop python 2 support. 393 static void Convert(PyRefType &type, PyObject *&py_obj) {} 394 395 using PythonObject::Reset; 396 397 void Reset(PyRefType type, PyObject *py_obj) { 398 Reset(); 399 if (!py_obj) 400 return; 401 T::Convert(type, py_obj); 402 if (T::Check(py_obj)) 403 PythonObject::Reset(type, py_obj); 404 else if (type == PyRefType::Owned) 405 Py_DECREF(py_obj); 406 } 407 408 TypedPythonObject(PyRefType type, PyObject *py_obj) { Reset(type, py_obj); } 409 410 TypedPythonObject() {} 411 }; 412 413 class PythonBytes : public TypedPythonObject<PythonBytes> { 414 public: 415 using TypedPythonObject::TypedPythonObject; 416 explicit PythonBytes(llvm::ArrayRef<uint8_t> bytes); 417 PythonBytes(const uint8_t *bytes, size_t length); 418 419 static bool Check(PyObject *py_obj); 420 421 llvm::ArrayRef<uint8_t> GetBytes() const; 422 423 size_t GetSize() const; 424 425 void SetBytes(llvm::ArrayRef<uint8_t> stringbytes); 426 427 StructuredData::StringSP CreateStructuredString() const; 428 }; 429 430 class PythonByteArray : public TypedPythonObject<PythonByteArray> { 431 public: 432 using TypedPythonObject::TypedPythonObject; 433 explicit PythonByteArray(llvm::ArrayRef<uint8_t> bytes); 434 PythonByteArray(const uint8_t *bytes, size_t length); 435 PythonByteArray(const PythonBytes &object); 436 437 static bool Check(PyObject *py_obj); 438 439 llvm::ArrayRef<uint8_t> GetBytes() const; 440 441 size_t GetSize() const; 442 443 void SetBytes(llvm::ArrayRef<uint8_t> stringbytes); 444 445 StructuredData::StringSP CreateStructuredString() const; 446 }; 447 448 class PythonString : public TypedPythonObject<PythonString> { 449 public: 450 using TypedPythonObject::TypedPythonObject; 451 static llvm::Expected<PythonString> FromUTF8(llvm::StringRef string); 452 453 PythonString() : TypedPythonObject() {} // MSVC requires this for some reason 454 455 explicit PythonString(llvm::StringRef string); // safe, null on error 456 457 static bool Check(PyObject *py_obj); 458 static void Convert(PyRefType &type, PyObject *&py_obj); 459 460 llvm::StringRef GetString() const; // safe, empty string on error 461 462 llvm::Expected<llvm::StringRef> AsUTF8() const; 463 464 size_t GetSize() const; 465 466 void SetString(llvm::StringRef string); // safe, null on error 467 468 StructuredData::StringSP CreateStructuredString() const; 469 }; 470 471 class PythonInteger : public TypedPythonObject<PythonInteger> { 472 public: 473 using TypedPythonObject::TypedPythonObject; 474 475 PythonInteger() : TypedPythonObject() {} // MSVC requires this for some reason 476 477 explicit PythonInteger(int64_t value); 478 479 static bool Check(PyObject *py_obj); 480 static void Convert(PyRefType &type, PyObject *&py_obj); 481 482 int64_t GetInteger() const; 483 484 void SetInteger(int64_t value); 485 486 StructuredData::IntegerSP CreateStructuredInteger() const; 487 }; 488 489 class PythonBoolean : public TypedPythonObject<PythonBoolean> { 490 public: 491 using TypedPythonObject::TypedPythonObject; 492 493 explicit PythonBoolean(bool value); 494 495 static bool Check(PyObject *py_obj); 496 497 bool GetValue() const; 498 499 void SetValue(bool value); 500 501 StructuredData::BooleanSP CreateStructuredBoolean() const; 502 }; 503 504 class PythonList : public TypedPythonObject<PythonList> { 505 public: 506 using TypedPythonObject::TypedPythonObject; 507 508 PythonList() : TypedPythonObject() {} // MSVC requires this for some reason 509 510 explicit PythonList(PyInitialValue value); 511 explicit PythonList(int list_size); 512 513 static bool Check(PyObject *py_obj); 514 515 uint32_t GetSize() const; 516 517 PythonObject GetItemAtIndex(uint32_t index) const; 518 519 void SetItemAtIndex(uint32_t index, const PythonObject &object); 520 521 void AppendItem(const PythonObject &object); 522 523 StructuredData::ArraySP CreateStructuredArray() const; 524 }; 525 526 class PythonTuple : public TypedPythonObject<PythonTuple> { 527 public: 528 using TypedPythonObject::TypedPythonObject; 529 530 explicit PythonTuple(PyInitialValue value); 531 explicit PythonTuple(int tuple_size); 532 PythonTuple(std::initializer_list<PythonObject> objects); 533 PythonTuple(std::initializer_list<PyObject *> objects); 534 535 static bool Check(PyObject *py_obj); 536 537 uint32_t GetSize() const; 538 539 PythonObject GetItemAtIndex(uint32_t index) const; 540 541 void SetItemAtIndex(uint32_t index, const PythonObject &object); 542 543 StructuredData::ArraySP CreateStructuredArray() const; 544 }; 545 546 class PythonDictionary : public TypedPythonObject<PythonDictionary> { 547 public: 548 using TypedPythonObject::TypedPythonObject; 549 550 PythonDictionary() : TypedPythonObject() {} // MSVC requires this for some reason 551 552 explicit PythonDictionary(PyInitialValue value); 553 554 static bool Check(PyObject *py_obj); 555 556 uint32_t GetSize() const; 557 558 PythonList GetKeys() const; 559 560 PythonObject GetItemForKey(const PythonObject &key) const; // DEPRECATED 561 void SetItemForKey(const PythonObject &key, 562 const PythonObject &value); // DEPRECATED 563 564 llvm::Expected<PythonObject> GetItem(const PythonObject &key) const; 565 llvm::Expected<PythonObject> GetItem(const char *key) const; 566 llvm::Error SetItem(const PythonObject &key, const PythonObject &value) const; 567 llvm::Error SetItem(const char *key, const PythonObject &value) const; 568 569 StructuredData::DictionarySP CreateStructuredDictionary() const; 570 }; 571 572 class PythonModule : public TypedPythonObject<PythonModule> { 573 public: 574 using TypedPythonObject::TypedPythonObject; 575 576 static bool Check(PyObject *py_obj); 577 578 static PythonModule BuiltinsModule(); 579 580 static PythonModule MainModule(); 581 582 static PythonModule AddModule(llvm::StringRef module); 583 584 // safe, returns invalid on error; 585 static PythonModule ImportModule(llvm::StringRef name) { 586 std::string s = name; 587 auto mod = Import(s.c_str()); 588 if (!mod) { 589 llvm::consumeError(mod.takeError()); 590 return PythonModule(); 591 } 592 return std::move(mod.get()); 593 } 594 595 static llvm::Expected<PythonModule> Import(const char *name); 596 597 llvm::Expected<PythonObject> Get(const char *name); 598 599 PythonDictionary GetDictionary() const; 600 }; 601 602 class PythonCallable : public TypedPythonObject<PythonCallable> { 603 public: 604 using TypedPythonObject::TypedPythonObject; 605 606 struct ArgInfo { 607 /* the largest number of positional arguments this callable 608 * can accept, or UNBOUNDED, ie UINT_MAX if it's a varargs 609 * function and can accept an arbitrary number */ 610 unsigned max_positional_args; 611 static constexpr unsigned UNBOUNDED = UINT_MAX; // FIXME c++17 inline 612 /* the number of positional arguments, including optional ones, 613 * and excluding varargs. If this is a bound method, then the 614 * count will still include a +1 for self. 615 * 616 * FIXME. That's crazy. This should be replaced with 617 * an accurate min and max for positional args. 618 */ 619 int count; 620 /* does the callable have positional varargs? */ 621 bool has_varargs : 1; // FIXME delete this 622 }; 623 624 static bool Check(PyObject *py_obj); 625 626 llvm::Expected<ArgInfo> GetArgInfo() const; 627 628 llvm::Expected<ArgInfo> GetInitArgInfo() const; 629 630 ArgInfo GetNumArguments() const; // DEPRECATED 631 632 // If the callable is a Py_Class, then find the number of arguments 633 // of the __init__ method. 634 ArgInfo GetNumInitArguments() const; // DEPRECATED 635 636 PythonObject operator()(); 637 638 PythonObject operator()(std::initializer_list<PyObject *> args); 639 640 PythonObject operator()(std::initializer_list<PythonObject> args); 641 642 template <typename Arg, typename... Args> 643 PythonObject operator()(const Arg &arg, Args... args) { 644 return operator()({arg, args...}); 645 } 646 }; 647 648 class PythonFile : public TypedPythonObject<PythonFile> { 649 public: 650 using TypedPythonObject::TypedPythonObject; 651 652 PythonFile() : TypedPythonObject() {} // MSVC requires this for some reason 653 654 static bool Check(PyObject *py_obj); 655 656 static llvm::Expected<PythonFile> FromFile(File &file, 657 const char *mode = nullptr); 658 659 llvm::Expected<lldb::FileSP> ConvertToFile(bool borrowed = false); 660 llvm::Expected<lldb::FileSP> 661 ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed = false); 662 }; 663 664 class PythonException : public llvm::ErrorInfo<PythonException> { 665 private: 666 PyObject *m_exception_type, *m_exception, *m_traceback; 667 PyObject *m_repr_bytes; 668 669 public: 670 static char ID; 671 const char *toCString() const; 672 PythonException(const char *caller = nullptr); 673 void Restore(); 674 ~PythonException(); 675 void log(llvm::raw_ostream &OS) const override; 676 std::error_code convertToErrorCode() const override; 677 }; 678 679 // This extracts the underlying T out of an Expected<T> and returns it. 680 // If the Expected is an Error instead of a T, that error will be converted 681 // into a python exception, and this will return a default-constructed T. 682 // 683 // This is appropriate for use right at the boundary of python calling into 684 // C++, such as in a SWIG typemap. In such a context you should simply 685 // check if the returned T is valid, and if it is, return a NULL back 686 // to python. This will result in the Error being raised as an exception 687 // from python code's point of view. 688 // 689 // For example: 690 // ``` 691 // Expected<Foo *> efoop = some_cpp_function(); 692 // Foo *foop = unwrapOrSetPythonException(efoop); 693 // if (!foop) 694 // return NULL; 695 // do_something(*foop); 696 // 697 // If the Error returned was itself created because a python exception was 698 // raised when C++ code called into python, then the original exception 699 // will be restored. Otherwise a simple string exception will be raised. 700 template <typename T> T unwrapOrSetPythonException(llvm::Expected<T> expected) { 701 if (expected) 702 return expected.get(); 703 llvm::handleAllErrors( 704 expected.takeError(), [](PythonException &E) { E.Restore(); }, 705 [](const llvm::ErrorInfoBase &E) { 706 PyErr_SetString(PyExc_Exception, E.message().c_str()); 707 }); 708 return T(); 709 } 710 711 } // namespace lldb_private 712 713 #endif 714 715 #endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H 716