xref: /llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp (revision 0f783599a4c645d8ae826f990f7b938fac6e5dae)
1 //===-- PythonDataObjects.cpp -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifdef LLDB_DISABLE_PYTHON
10 
11 // Python is disabled in this build
12 
13 #else
14 
15 #include "PythonDataObjects.h"
16 #include "ScriptInterpreterPython.h"
17 
18 #include "lldb/Host/File.h"
19 #include "lldb/Host/FileSystem.h"
20 #include "lldb/Interpreter/ScriptInterpreter.h"
21 #include "lldb/Utility/Log.h"
22 #include "lldb/Utility/Stream.h"
23 
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/ConvertUTF.h"
27 #include "llvm/Support/Errno.h"
28 
29 #include <stdio.h>
30 
31 using namespace lldb_private;
32 using namespace lldb;
33 using namespace lldb_private::python;
34 using llvm::Error;
35 using llvm::Expected;
36 
37 template <> Expected<bool> python::As<bool>(Expected<PythonObject> &&obj) {
38   if (!obj)
39     return obj.takeError();
40   return obj.get().IsTrue();
41 }
42 
43 template <>
44 Expected<long long> python::As<long long>(Expected<PythonObject> &&obj) {
45   if (!obj)
46     return obj.takeError();
47   return obj.get().AsLongLong();
48 }
49 
50 void StructuredPythonObject::Serialize(llvm::json::OStream &s) const {
51   s.value(llvm::formatv("Python Obj: {0:X}", GetValue()).str());
52 }
53 
54 // PythonObject
55 
56 void PythonObject::Dump(Stream &strm) const {
57   if (m_py_obj) {
58     FILE *file = llvm::sys::RetryAfterSignal(nullptr, ::tmpfile);
59     if (file) {
60       ::PyObject_Print(m_py_obj, file, 0);
61       const long length = ftell(file);
62       if (length) {
63         ::rewind(file);
64         std::vector<char> file_contents(length, '\0');
65         const size_t length_read =
66             ::fread(file_contents.data(), 1, file_contents.size(), file);
67         if (length_read > 0)
68           strm.Write(file_contents.data(), length_read);
69       }
70       ::fclose(file);
71     }
72   } else
73     strm.PutCString("NULL");
74 }
75 
76 PyObjectType PythonObject::GetObjectType() const {
77   if (!IsAllocated())
78     return PyObjectType::None;
79 
80   if (PythonModule::Check(m_py_obj))
81     return PyObjectType::Module;
82   if (PythonList::Check(m_py_obj))
83     return PyObjectType::List;
84   if (PythonTuple::Check(m_py_obj))
85     return PyObjectType::Tuple;
86   if (PythonDictionary::Check(m_py_obj))
87     return PyObjectType::Dictionary;
88   if (PythonString::Check(m_py_obj))
89     return PyObjectType::String;
90 #if PY_MAJOR_VERSION >= 3
91   if (PythonBytes::Check(m_py_obj))
92     return PyObjectType::Bytes;
93 #endif
94   if (PythonByteArray::Check(m_py_obj))
95     return PyObjectType::ByteArray;
96   if (PythonBoolean::Check(m_py_obj))
97     return PyObjectType::Boolean;
98   if (PythonInteger::Check(m_py_obj))
99     return PyObjectType::Integer;
100   if (PythonFile::Check(m_py_obj))
101     return PyObjectType::File;
102   if (PythonCallable::Check(m_py_obj))
103     return PyObjectType::Callable;
104   return PyObjectType::Unknown;
105 }
106 
107 PythonString PythonObject::Repr() const {
108   if (!m_py_obj)
109     return PythonString();
110   PyObject *repr = PyObject_Repr(m_py_obj);
111   if (!repr)
112     return PythonString();
113   return PythonString(PyRefType::Owned, repr);
114 }
115 
116 PythonString PythonObject::Str() const {
117   if (!m_py_obj)
118     return PythonString();
119   PyObject *str = PyObject_Str(m_py_obj);
120   if (!str)
121     return PythonString();
122   return PythonString(PyRefType::Owned, str);
123 }
124 
125 PythonObject
126 PythonObject::ResolveNameWithDictionary(llvm::StringRef name,
127                                         const PythonDictionary &dict) {
128   size_t dot_pos = name.find('.');
129   llvm::StringRef piece = name.substr(0, dot_pos);
130   PythonObject result = dict.GetItemForKey(PythonString(piece));
131   if (dot_pos == llvm::StringRef::npos) {
132     // There was no dot, we're done.
133     return result;
134   }
135 
136   // There was a dot.  The remaining portion of the name should be looked up in
137   // the context of the object that was found in the dictionary.
138   return result.ResolveName(name.substr(dot_pos + 1));
139 }
140 
141 PythonObject PythonObject::ResolveName(llvm::StringRef name) const {
142   // Resolve the name in the context of the specified object.  If, for example,
143   // `this` refers to a PyModule, then this will look for `name` in this
144   // module.  If `this` refers to a PyType, then it will resolve `name` as an
145   // attribute of that type.  If `this` refers to an instance of an object,
146   // then it will resolve `name` as the value of the specified field.
147   //
148   // This function handles dotted names so that, for example, if `m_py_obj`
149   // refers to the `sys` module, and `name` == "path.append", then it will find
150   // the function `sys.path.append`.
151 
152   size_t dot_pos = name.find('.');
153   if (dot_pos == llvm::StringRef::npos) {
154     // No dots in the name, we should be able to find the value immediately as
155     // an attribute of `m_py_obj`.
156     return GetAttributeValue(name);
157   }
158 
159   // Look up the first piece of the name, and resolve the rest as a child of
160   // that.
161   PythonObject parent = ResolveName(name.substr(0, dot_pos));
162   if (!parent.IsAllocated())
163     return PythonObject();
164 
165   // Tail recursion.. should be optimized by the compiler
166   return parent.ResolveName(name.substr(dot_pos + 1));
167 }
168 
169 bool PythonObject::HasAttribute(llvm::StringRef attr) const {
170   if (!IsValid())
171     return false;
172   PythonString py_attr(attr);
173   return !!PyObject_HasAttr(m_py_obj, py_attr.get());
174 }
175 
176 PythonObject PythonObject::GetAttributeValue(llvm::StringRef attr) const {
177   if (!IsValid())
178     return PythonObject();
179 
180   PythonString py_attr(attr);
181   if (!PyObject_HasAttr(m_py_obj, py_attr.get()))
182     return PythonObject();
183 
184   return PythonObject(PyRefType::Owned,
185                       PyObject_GetAttr(m_py_obj, py_attr.get()));
186 }
187 
188 StructuredData::ObjectSP PythonObject::CreateStructuredObject() const {
189   switch (GetObjectType()) {
190   case PyObjectType::Dictionary:
191     return PythonDictionary(PyRefType::Borrowed, m_py_obj)
192         .CreateStructuredDictionary();
193   case PyObjectType::Boolean:
194     return PythonBoolean(PyRefType::Borrowed, m_py_obj)
195         .CreateStructuredBoolean();
196   case PyObjectType::Integer:
197     return PythonInteger(PyRefType::Borrowed, m_py_obj)
198         .CreateStructuredInteger();
199   case PyObjectType::List:
200     return PythonList(PyRefType::Borrowed, m_py_obj).CreateStructuredArray();
201   case PyObjectType::String:
202     return PythonString(PyRefType::Borrowed, m_py_obj).CreateStructuredString();
203   case PyObjectType::Bytes:
204     return PythonBytes(PyRefType::Borrowed, m_py_obj).CreateStructuredString();
205   case PyObjectType::ByteArray:
206     return PythonByteArray(PyRefType::Borrowed, m_py_obj)
207         .CreateStructuredString();
208   case PyObjectType::None:
209     return StructuredData::ObjectSP();
210   default:
211     return StructuredData::ObjectSP(new StructuredPythonObject(m_py_obj));
212   }
213 }
214 
215 // PythonString
216 
217 PythonBytes::PythonBytes(llvm::ArrayRef<uint8_t> bytes) { SetBytes(bytes); }
218 
219 PythonBytes::PythonBytes(const uint8_t *bytes, size_t length) {
220   SetBytes(llvm::ArrayRef<uint8_t>(bytes, length));
221 }
222 
223 bool PythonBytes::Check(PyObject *py_obj) {
224   if (!py_obj)
225     return false;
226   return PyBytes_Check(py_obj);
227 }
228 
229 llvm::ArrayRef<uint8_t> PythonBytes::GetBytes() const {
230   if (!IsValid())
231     return llvm::ArrayRef<uint8_t>();
232 
233   Py_ssize_t size;
234   char *c;
235 
236   PyBytes_AsStringAndSize(m_py_obj, &c, &size);
237   return llvm::ArrayRef<uint8_t>(reinterpret_cast<uint8_t *>(c), size);
238 }
239 
240 size_t PythonBytes::GetSize() const {
241   if (!IsValid())
242     return 0;
243   return PyBytes_Size(m_py_obj);
244 }
245 
246 void PythonBytes::SetBytes(llvm::ArrayRef<uint8_t> bytes) {
247   const char *data = reinterpret_cast<const char *>(bytes.data());
248   PyObject *py_bytes = PyBytes_FromStringAndSize(data, bytes.size());
249   PythonObject::Reset(PyRefType::Owned, py_bytes);
250 }
251 
252 StructuredData::StringSP PythonBytes::CreateStructuredString() const {
253   StructuredData::StringSP result(new StructuredData::String);
254   Py_ssize_t size;
255   char *c;
256   PyBytes_AsStringAndSize(m_py_obj, &c, &size);
257   result->SetValue(std::string(c, size));
258   return result;
259 }
260 
261 PythonByteArray::PythonByteArray(llvm::ArrayRef<uint8_t> bytes)
262     : PythonByteArray(bytes.data(), bytes.size()) {}
263 
264 PythonByteArray::PythonByteArray(const uint8_t *bytes, size_t length) {
265   const char *str = reinterpret_cast<const char *>(bytes);
266   Reset(PyRefType::Owned, PyByteArray_FromStringAndSize(str, length));
267 }
268 
269 bool PythonByteArray::Check(PyObject *py_obj) {
270   if (!py_obj)
271     return false;
272   return PyByteArray_Check(py_obj);
273 }
274 
275 llvm::ArrayRef<uint8_t> PythonByteArray::GetBytes() const {
276   if (!IsValid())
277     return llvm::ArrayRef<uint8_t>();
278 
279   char *c = PyByteArray_AsString(m_py_obj);
280   size_t size = GetSize();
281   return llvm::ArrayRef<uint8_t>(reinterpret_cast<uint8_t *>(c), size);
282 }
283 
284 size_t PythonByteArray::GetSize() const {
285   if (!IsValid())
286     return 0;
287 
288   return PyByteArray_Size(m_py_obj);
289 }
290 
291 StructuredData::StringSP PythonByteArray::CreateStructuredString() const {
292   StructuredData::StringSP result(new StructuredData::String);
293   llvm::ArrayRef<uint8_t> bytes = GetBytes();
294   const char *str = reinterpret_cast<const char *>(bytes.data());
295   result->SetValue(std::string(str, bytes.size()));
296   return result;
297 }
298 
299 // PythonString
300 
301 Expected<PythonString> PythonString::FromUTF8(llvm::StringRef string) {
302 #if PY_MAJOR_VERSION >= 3
303   PyObject *str = PyUnicode_FromStringAndSize(string.data(), string.size());
304 #else
305   PyObject *str = PyString_FromStringAndSize(string.data(), string.size());
306 #endif
307   if (!str)
308     return llvm::make_error<PythonException>();
309   return Take<PythonString>(str);
310 }
311 
312 PythonString::PythonString(llvm::StringRef string) { SetString(string); }
313 
314 bool PythonString::Check(PyObject *py_obj) {
315   if (!py_obj)
316     return false;
317 
318   if (PyUnicode_Check(py_obj))
319     return true;
320 #if PY_MAJOR_VERSION < 3
321   if (PyString_Check(py_obj))
322     return true;
323 #endif
324   return false;
325 }
326 
327 void PythonString::Convert(PyRefType &type, PyObject *&py_obj) {
328 #if PY_MAJOR_VERSION < 3
329   // In Python 2, Don't store PyUnicode objects directly, because we need
330   // access to their underlying character buffers which Python 2 doesn't
331   // provide.
332   if (PyUnicode_Check(py_obj)) {
333     PyObject *s = PyUnicode_AsUTF8String(py_obj);
334     if (s == nullptr) {
335       PyErr_Clear();
336       if (type == PyRefType::Owned)
337         Py_DECREF(py_obj);
338       return;
339     }
340     if (type == PyRefType::Owned)
341       Py_DECREF(py_obj);
342     else
343       type = PyRefType::Owned;
344     py_obj = s;
345   }
346 #endif
347 }
348 
349 llvm::StringRef PythonString::GetString() const {
350   auto s = AsUTF8();
351   if (!s) {
352     llvm::consumeError(s.takeError());
353     return llvm::StringRef("");
354   }
355   return s.get();
356 }
357 
358 Expected<llvm::StringRef> PythonString::AsUTF8() const {
359   if (!IsValid())
360     return nullDeref();
361 
362   Py_ssize_t size;
363   const char *data;
364 
365 #if PY_MAJOR_VERSION >= 3
366   data = PyUnicode_AsUTF8AndSize(m_py_obj, &size);
367 #else
368   char *c = NULL;
369   int r = PyString_AsStringAndSize(m_py_obj, &c, &size);
370   if (r < 0)
371     c = NULL;
372   data = c;
373 #endif
374 
375   if (!data)
376     return exception();
377 
378   return llvm::StringRef(data, size);
379 }
380 
381 size_t PythonString::GetSize() const {
382   if (IsValid()) {
383 #if PY_MAJOR_VERSION >= 3
384     return PyUnicode_GetSize(m_py_obj);
385 #else
386     return PyString_Size(m_py_obj);
387 #endif
388   }
389   return 0;
390 }
391 
392 void PythonString::SetString(llvm::StringRef string) {
393   auto s = FromUTF8(string);
394   if (!s) {
395     llvm::consumeError(s.takeError());
396     Reset();
397   } else {
398     PythonObject::Reset(std::move(s.get()));
399   }
400 }
401 
402 StructuredData::StringSP PythonString::CreateStructuredString() const {
403   StructuredData::StringSP result(new StructuredData::String);
404   result->SetValue(GetString());
405   return result;
406 }
407 
408 // PythonInteger
409 
410 PythonInteger::PythonInteger(int64_t value) { SetInteger(value); }
411 
412 bool PythonInteger::Check(PyObject *py_obj) {
413   if (!py_obj)
414     return false;
415 
416 #if PY_MAJOR_VERSION >= 3
417   // Python 3 does not have PyInt_Check.  There is only one type of integral
418   // value, long.
419   return PyLong_Check(py_obj);
420 #else
421   return PyLong_Check(py_obj) || PyInt_Check(py_obj);
422 #endif
423 }
424 
425 void PythonInteger::Convert(PyRefType &type, PyObject *&py_obj) {
426 #if PY_MAJOR_VERSION < 3
427   // Always store this as a PyLong, which makes interoperability between Python
428   // 2.x and Python 3.x easier.  This is only necessary in 2.x, since 3.x
429   // doesn't even have a PyInt.
430   if (PyInt_Check(py_obj)) {
431     // Since we converted the original object to a different type, the new
432     // object is an owned object regardless of the ownership semantics
433     // requested by the user.
434     long long value = PyInt_AsLong(py_obj);
435     PyObject *l = nullptr;
436     if (!PyErr_Occurred())
437       l = PyLong_FromLongLong(value);
438     if (l == nullptr) {
439       PyErr_Clear();
440       if (type == PyRefType::Owned)
441         Py_DECREF(py_obj);
442       return;
443     }
444     if (type == PyRefType::Owned)
445       Py_DECREF(py_obj);
446     else
447       type = PyRefType::Owned;
448     py_obj = l;
449   }
450 #endif
451 }
452 
453 int64_t PythonInteger::GetInteger() const {
454   if (m_py_obj) {
455     assert(PyLong_Check(m_py_obj) &&
456            "PythonInteger::GetInteger has a PyObject that isn't a PyLong");
457 
458     int overflow = 0;
459     int64_t result = PyLong_AsLongLongAndOverflow(m_py_obj, &overflow);
460     if (overflow != 0) {
461       // We got an integer that overflows, like 18446744072853913392L we can't
462       // use PyLong_AsLongLong() as it will return 0xffffffffffffffff. If we
463       // use the unsigned long long it will work as expected.
464       const uint64_t uval = PyLong_AsUnsignedLongLong(m_py_obj);
465       result = static_cast<int64_t>(uval);
466     }
467     return result;
468   }
469   return UINT64_MAX;
470 }
471 
472 void PythonInteger::SetInteger(int64_t value) {
473   PythonObject::Reset(PyRefType::Owned, PyLong_FromLongLong(value));
474 }
475 
476 StructuredData::IntegerSP PythonInteger::CreateStructuredInteger() const {
477   StructuredData::IntegerSP result(new StructuredData::Integer);
478   result->SetValue(GetInteger());
479   return result;
480 }
481 
482 // PythonBoolean
483 
484 PythonBoolean::PythonBoolean(bool value) {
485   SetValue(value);
486 }
487 
488 bool PythonBoolean::Check(PyObject *py_obj) {
489   return py_obj ? PyBool_Check(py_obj) : false;
490 }
491 
492 bool PythonBoolean::GetValue() const {
493   return m_py_obj ? PyObject_IsTrue(m_py_obj) : false;
494 }
495 
496 void PythonBoolean::SetValue(bool value) {
497   PythonObject::Reset(PyRefType::Owned, PyBool_FromLong(value));
498 }
499 
500 StructuredData::BooleanSP PythonBoolean::CreateStructuredBoolean() const {
501   StructuredData::BooleanSP result(new StructuredData::Boolean);
502   result->SetValue(GetValue());
503   return result;
504 }
505 
506 // PythonList
507 
508 PythonList::PythonList(PyInitialValue value) {
509   if (value == PyInitialValue::Empty)
510     Reset(PyRefType::Owned, PyList_New(0));
511 }
512 
513 PythonList::PythonList(int list_size) {
514   Reset(PyRefType::Owned, PyList_New(list_size));
515 }
516 
517 bool PythonList::Check(PyObject *py_obj) {
518   if (!py_obj)
519     return false;
520   return PyList_Check(py_obj);
521 }
522 
523 uint32_t PythonList::GetSize() const {
524   if (IsValid())
525     return PyList_GET_SIZE(m_py_obj);
526   return 0;
527 }
528 
529 PythonObject PythonList::GetItemAtIndex(uint32_t index) const {
530   if (IsValid())
531     return PythonObject(PyRefType::Borrowed, PyList_GetItem(m_py_obj, index));
532   return PythonObject();
533 }
534 
535 void PythonList::SetItemAtIndex(uint32_t index, const PythonObject &object) {
536   if (IsAllocated() && object.IsValid()) {
537     // PyList_SetItem is documented to "steal" a reference, so we need to
538     // convert it to an owned reference by incrementing it.
539     Py_INCREF(object.get());
540     PyList_SetItem(m_py_obj, index, object.get());
541   }
542 }
543 
544 void PythonList::AppendItem(const PythonObject &object) {
545   if (IsAllocated() && object.IsValid()) {
546     // `PyList_Append` does *not* steal a reference, so do not call `Py_INCREF`
547     // here like we do with `PyList_SetItem`.
548     PyList_Append(m_py_obj, object.get());
549   }
550 }
551 
552 StructuredData::ArraySP PythonList::CreateStructuredArray() const {
553   StructuredData::ArraySP result(new StructuredData::Array);
554   uint32_t count = GetSize();
555   for (uint32_t i = 0; i < count; ++i) {
556     PythonObject obj = GetItemAtIndex(i);
557     result->AddItem(obj.CreateStructuredObject());
558   }
559   return result;
560 }
561 
562 // PythonTuple
563 
564 PythonTuple::PythonTuple(PyInitialValue value) {
565   if (value == PyInitialValue::Empty)
566     Reset(PyRefType::Owned, PyTuple_New(0));
567 }
568 
569 PythonTuple::PythonTuple(int tuple_size) {
570   Reset(PyRefType::Owned, PyTuple_New(tuple_size));
571 }
572 
573 PythonTuple::PythonTuple(std::initializer_list<PythonObject> objects) {
574   m_py_obj = PyTuple_New(objects.size());
575 
576   uint32_t idx = 0;
577   for (auto object : objects) {
578     if (object.IsValid())
579       SetItemAtIndex(idx, object);
580     idx++;
581   }
582 }
583 
584 PythonTuple::PythonTuple(std::initializer_list<PyObject *> objects) {
585   m_py_obj = PyTuple_New(objects.size());
586 
587   uint32_t idx = 0;
588   for (auto py_object : objects) {
589     PythonObject object(PyRefType::Borrowed, py_object);
590     if (object.IsValid())
591       SetItemAtIndex(idx, object);
592     idx++;
593   }
594 }
595 
596 bool PythonTuple::Check(PyObject *py_obj) {
597   if (!py_obj)
598     return false;
599   return PyTuple_Check(py_obj);
600 }
601 
602 uint32_t PythonTuple::GetSize() const {
603   if (IsValid())
604     return PyTuple_GET_SIZE(m_py_obj);
605   return 0;
606 }
607 
608 PythonObject PythonTuple::GetItemAtIndex(uint32_t index) const {
609   if (IsValid())
610     return PythonObject(PyRefType::Borrowed, PyTuple_GetItem(m_py_obj, index));
611   return PythonObject();
612 }
613 
614 void PythonTuple::SetItemAtIndex(uint32_t index, const PythonObject &object) {
615   if (IsAllocated() && object.IsValid()) {
616     // PyTuple_SetItem is documented to "steal" a reference, so we need to
617     // convert it to an owned reference by incrementing it.
618     Py_INCREF(object.get());
619     PyTuple_SetItem(m_py_obj, index, object.get());
620   }
621 }
622 
623 StructuredData::ArraySP PythonTuple::CreateStructuredArray() const {
624   StructuredData::ArraySP result(new StructuredData::Array);
625   uint32_t count = GetSize();
626   for (uint32_t i = 0; i < count; ++i) {
627     PythonObject obj = GetItemAtIndex(i);
628     result->AddItem(obj.CreateStructuredObject());
629   }
630   return result;
631 }
632 
633 // PythonDictionary
634 
635 PythonDictionary::PythonDictionary(PyInitialValue value) {
636   if (value == PyInitialValue::Empty)
637     Reset(PyRefType::Owned, PyDict_New());
638 }
639 
640 bool PythonDictionary::Check(PyObject *py_obj) {
641   if (!py_obj)
642     return false;
643 
644   return PyDict_Check(py_obj);
645 }
646 
647 uint32_t PythonDictionary::GetSize() const {
648   if (IsValid())
649     return PyDict_Size(m_py_obj);
650   return 0;
651 }
652 
653 PythonList PythonDictionary::GetKeys() const {
654   if (IsValid())
655     return PythonList(PyRefType::Owned, PyDict_Keys(m_py_obj));
656   return PythonList(PyInitialValue::Invalid);
657 }
658 
659 PythonObject PythonDictionary::GetItemForKey(const PythonObject &key) const {
660   if (IsAllocated() && key.IsValid())
661     return PythonObject(PyRefType::Borrowed,
662                         PyDict_GetItem(m_py_obj, key.get()));
663   return PythonObject();
664 }
665 
666 void PythonDictionary::SetItemForKey(const PythonObject &key,
667                                      const PythonObject &value) {
668   if (IsAllocated() && key.IsValid() && value.IsValid())
669     PyDict_SetItem(m_py_obj, key.get(), value.get());
670 }
671 
672 StructuredData::DictionarySP
673 PythonDictionary::CreateStructuredDictionary() const {
674   StructuredData::DictionarySP result(new StructuredData::Dictionary);
675   PythonList keys(GetKeys());
676   uint32_t num_keys = keys.GetSize();
677   for (uint32_t i = 0; i < num_keys; ++i) {
678     PythonObject key = keys.GetItemAtIndex(i);
679     PythonObject value = GetItemForKey(key);
680     StructuredData::ObjectSP structured_value = value.CreateStructuredObject();
681     result->AddItem(key.Str().GetString(), structured_value);
682   }
683   return result;
684 }
685 
686 PythonModule PythonModule::BuiltinsModule() {
687 #if PY_MAJOR_VERSION >= 3
688   return AddModule("builtins");
689 #else
690   return AddModule("__builtin__");
691 #endif
692 }
693 
694 PythonModule PythonModule::MainModule() { return AddModule("__main__"); }
695 
696 PythonModule PythonModule::AddModule(llvm::StringRef module) {
697   std::string str = module.str();
698   return PythonModule(PyRefType::Borrowed, PyImport_AddModule(str.c_str()));
699 }
700 
701 Expected<PythonModule> PythonModule::Import(const char *name) {
702   PyObject *mod = PyImport_ImportModule(name);
703   if (!mod)
704     return exception();
705   return Take<PythonModule>(mod);
706 }
707 
708 Expected<PythonObject> PythonModule::Get(const char *name) {
709   if (!IsValid())
710     return nullDeref();
711   PyObject *dict = PyModule_GetDict(m_py_obj);
712   if (!dict)
713     return exception();
714   PyObject *item = PyDict_GetItemString(dict, name);
715   if (!item)
716     return exception();
717   return Retain<PythonObject>(item);
718 }
719 
720 bool PythonModule::Check(PyObject *py_obj) {
721   if (!py_obj)
722     return false;
723 
724   return PyModule_Check(py_obj);
725 }
726 
727 PythonDictionary PythonModule::GetDictionary() const {
728   return PythonDictionary(PyRefType::Borrowed, PyModule_GetDict(m_py_obj));
729 }
730 
731 bool PythonCallable::Check(PyObject *py_obj) {
732   if (!py_obj)
733     return false;
734 
735   return PyCallable_Check(py_obj);
736 }
737 
738 PythonCallable::ArgInfo PythonCallable::GetNumInitArguments() const {
739   ArgInfo result = {0, false, false, false};
740   if (!IsValid())
741     return result;
742 
743   PythonObject __init__ = GetAttributeValue("__init__");
744   if (__init__.IsValid() ) {
745     auto __init_callable__ = __init__.AsType<PythonCallable>();
746     if (__init_callable__.IsValid())
747       return __init_callable__.GetNumArguments();
748   }
749   return result;
750 }
751 
752 PythonCallable::ArgInfo PythonCallable::GetNumArguments() const {
753   ArgInfo result = {0, false, false, false};
754   if (!IsValid())
755     return result;
756 
757   PyObject *py_func_obj = m_py_obj;
758   if (PyMethod_Check(py_func_obj)) {
759     py_func_obj = PyMethod_GET_FUNCTION(py_func_obj);
760     PythonObject im_self = GetAttributeValue("im_self");
761     if (im_self.IsValid() && !im_self.IsNone())
762       result.is_bound_method = true;
763   } else {
764     // see if this is a callable object with an __call__ method
765     if (!PyFunction_Check(py_func_obj)) {
766       PythonObject __call__ = GetAttributeValue("__call__");
767       if (__call__.IsValid()) {
768         auto __callable__ = __call__.AsType<PythonCallable>();
769         if (__callable__.IsValid()) {
770           py_func_obj = PyMethod_GET_FUNCTION(__callable__.get());
771           PythonObject im_self = GetAttributeValue("im_self");
772           if (im_self.IsValid() && !im_self.IsNone())
773             result.is_bound_method = true;
774         }
775       }
776     }
777   }
778 
779   if (!py_func_obj)
780     return result;
781 
782   PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(py_func_obj);
783   if (!code)
784     return result;
785 
786   result.count = code->co_argcount;
787   result.has_varargs = !!(code->co_flags & CO_VARARGS);
788   result.has_kwargs = !!(code->co_flags & CO_VARKEYWORDS);
789   return result;
790 }
791 
792 PythonObject PythonCallable::operator()() {
793   return PythonObject(PyRefType::Owned, PyObject_CallObject(m_py_obj, nullptr));
794 }
795 
796 PythonObject PythonCallable::
797 operator()(std::initializer_list<PyObject *> args) {
798   PythonTuple arg_tuple(args);
799   return PythonObject(PyRefType::Owned,
800                       PyObject_CallObject(m_py_obj, arg_tuple.get()));
801 }
802 
803 PythonObject PythonCallable::
804 operator()(std::initializer_list<PythonObject> args) {
805   PythonTuple arg_tuple(args);
806   return PythonObject(PyRefType::Owned,
807                       PyObject_CallObject(m_py_obj, arg_tuple.get()));
808 }
809 
810 bool PythonFile::Check(PyObject *py_obj) {
811   if (!py_obj)
812     return false;
813 #if PY_MAJOR_VERSION < 3
814   return PyFile_Check(py_obj);
815 #else
816   // In Python 3, there is no `PyFile_Check`, and in fact PyFile is not even a
817   // first-class object type anymore.  `PyFile_FromFd` is just a thin wrapper
818   // over `io.open()`, which returns some object derived from `io.IOBase`. As a
819   // result, the only way to detect a file in Python 3 is to check whether it
820   // inherits from `io.IOBase`.
821   auto io_module = PythonModule::Import("io");
822   if (!io_module) {
823     llvm::consumeError(io_module.takeError());
824     return false;
825   }
826   auto iobase = io_module.get().Get("IOBase");
827   if (!iobase) {
828     llvm::consumeError(iobase.takeError());
829     return false;
830   }
831   int r = PyObject_IsInstance(py_obj, iobase.get().get());
832   if (r < 0) {
833     llvm::consumeError(exception()); // clear the exception and log it.
834     return false;
835   }
836   return !!r;
837 #endif
838 }
839 
840 namespace {
841 class GIL {
842 public:
843   GIL() {
844     m_state = PyGILState_Ensure();
845     assert(!PyErr_Occurred());
846   }
847   ~GIL() { PyGILState_Release(m_state); }
848 
849 protected:
850   PyGILState_STATE m_state;
851 };
852 } // namespace
853 
854 const char *PythonException::toCString() const {
855   if (!m_repr_bytes)
856     return "unknown exception";
857   return PyBytes_AS_STRING(m_repr_bytes);
858 }
859 
860 PythonException::PythonException(const char *caller) {
861   assert(PyErr_Occurred());
862   m_exception_type = m_exception = m_traceback = m_repr_bytes = NULL;
863   PyErr_Fetch(&m_exception_type, &m_exception, &m_traceback);
864   PyErr_NormalizeException(&m_exception_type, &m_exception, &m_traceback);
865   PyErr_Clear();
866   if (m_exception) {
867     PyObject *repr = PyObject_Repr(m_exception);
868     if (repr) {
869       m_repr_bytes = PyUnicode_AsEncodedString(repr, "utf-8", nullptr);
870       if (!m_repr_bytes) {
871         PyErr_Clear();
872       }
873       Py_XDECREF(repr);
874     } else {
875       PyErr_Clear();
876     }
877   }
878   Log *log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_SCRIPT);
879   if (caller)
880     LLDB_LOGF(log, "%s failed with exception: %s", caller, toCString());
881   else
882     LLDB_LOGF(log, "python exception: %s", toCString());
883 }
884 void PythonException::Restore() {
885   if (m_exception_type && m_exception) {
886     PyErr_Restore(m_exception_type, m_exception, m_traceback);
887   } else {
888     PyErr_SetString(PyExc_Exception, toCString());
889   }
890   m_exception_type = m_exception = m_traceback = NULL;
891 }
892 
893 PythonException::~PythonException() {
894   Py_XDECREF(m_exception_type);
895   Py_XDECREF(m_exception);
896   Py_XDECREF(m_traceback);
897   Py_XDECREF(m_repr_bytes);
898 }
899 
900 void PythonException::log(llvm::raw_ostream &OS) const { OS << toCString(); }
901 
902 std::error_code PythonException::convertToErrorCode() const {
903   return llvm::inconvertibleErrorCode();
904 }
905 
906 char PythonException::ID = 0;
907 
908 llvm::Expected<File::OpenOptions>
909 GetOptionsForPyObject(const PythonObject &obj) {
910 #if PY_MAJOR_VERSION >= 3
911   auto options = File::OpenOptions(0);
912   auto readable = As<bool>(obj.CallMethod("readable"));
913   if (!readable)
914     return readable.takeError();
915   auto writable = As<bool>(obj.CallMethod("writable"));
916   if (!writable)
917     return writable.takeError();
918   if (readable.get())
919     options |= File::eOpenOptionRead;
920   if (writable.get())
921     options |= File::eOpenOptionWrite;
922   return options;
923 #else
924   PythonString py_mode = obj.GetAttributeValue("mode").AsType<PythonString>();
925   return File::GetOptionsFromMode(py_mode.GetString());
926 #endif
927 }
928 
929 // Base class template for python files.   All it knows how to do
930 // is hold a reference to the python object and close or flush it
931 // when the File is closed.
932 namespace {
933 template <typename Base> class OwnedPythonFile : public Base {
934 public:
935   template <typename... Args>
936   OwnedPythonFile(const PythonFile &file, bool borrowed, Args... args)
937       : Base(args...), m_py_obj(file), m_borrowed(borrowed) {
938     assert(m_py_obj);
939   }
940 
941   ~OwnedPythonFile() override {
942     assert(m_py_obj);
943     GIL takeGIL;
944     Close();
945     m_py_obj.Reset();
946   }
947 
948   bool IsPythonSideValid() const {
949     GIL takeGIL;
950     auto closed = As<bool>(m_py_obj.GetAttribute("closed"));
951     if (!closed) {
952       llvm::consumeError(closed.takeError());
953       return false;
954     }
955     return !closed.get();
956   }
957 
958   bool IsValid() const override {
959     return IsPythonSideValid() && Base::IsValid();
960   }
961 
962   Status Close() override {
963     assert(m_py_obj);
964     Status py_error, base_error;
965     GIL takeGIL;
966     if (!m_borrowed) {
967       auto r = m_py_obj.CallMethod("close");
968       if (!r)
969         py_error = Status(r.takeError());
970     }
971     base_error = Base::Close();
972     if (py_error.Fail())
973       return py_error;
974     return base_error;
975   };
976 
977   PyObject *GetPythonObject() const {
978     assert(m_py_obj.IsValid());
979     return m_py_obj.get();
980   }
981 
982   static bool classof(const File *file) = delete;
983 
984 protected:
985   PythonFile m_py_obj;
986   bool m_borrowed;
987 };
988 } // namespace
989 
990 // A SimplePythonFile is a OwnedPythonFile that just does all I/O as
991 // a NativeFile
992 namespace {
993 class SimplePythonFile : public OwnedPythonFile<NativeFile> {
994 public:
995   SimplePythonFile(const PythonFile &file, bool borrowed, int fd,
996                    File::OpenOptions options)
997       : OwnedPythonFile(file, borrowed, fd, options, false) {}
998 
999   static char ID;
1000   bool isA(const void *classID) const override {
1001     return classID == &ID || NativeFile::isA(classID);
1002   }
1003   static bool classof(const File *file) { return file->isA(&ID); }
1004 };
1005 char SimplePythonFile::ID = 0;
1006 } // namespace
1007 
1008 #if PY_MAJOR_VERSION >= 3
1009 
1010 namespace {
1011 class PythonBuffer {
1012 public:
1013   PythonBuffer &operator=(const PythonBuffer &) = delete;
1014   PythonBuffer(const PythonBuffer &) = delete;
1015 
1016   static Expected<PythonBuffer> Create(PythonObject &obj,
1017                                        int flags = PyBUF_SIMPLE) {
1018     Py_buffer py_buffer = {};
1019     PyObject_GetBuffer(obj.get(), &py_buffer, flags);
1020     if (!py_buffer.obj)
1021       return llvm::make_error<PythonException>();
1022     return PythonBuffer(py_buffer);
1023   }
1024 
1025   PythonBuffer(PythonBuffer &&other) {
1026     m_buffer = other.m_buffer;
1027     other.m_buffer.obj = nullptr;
1028   }
1029 
1030   ~PythonBuffer() {
1031     if (m_buffer.obj)
1032       PyBuffer_Release(&m_buffer);
1033   }
1034 
1035   Py_buffer &get() { return m_buffer; }
1036 
1037 private:
1038   // takes ownership of the buffer.
1039   PythonBuffer(const Py_buffer &py_buffer) : m_buffer(py_buffer) {}
1040   Py_buffer m_buffer;
1041 };
1042 } // namespace
1043 
1044 // Shared methods between TextPythonFile and BinaryPythonFile
1045 namespace {
1046 class PythonIOFile : public OwnedPythonFile<File> {
1047 public:
1048   PythonIOFile(const PythonFile &file, bool borrowed)
1049       : OwnedPythonFile(file, borrowed) {}
1050 
1051   ~PythonIOFile() override { Close(); }
1052 
1053   bool IsValid() const override { return IsPythonSideValid(); }
1054 
1055   Status Close() override {
1056     assert(m_py_obj);
1057     GIL takeGIL;
1058     if (m_borrowed)
1059       return Flush();
1060     auto r = m_py_obj.CallMethod("close");
1061     if (!r)
1062       return Status(r.takeError());
1063     return Status();
1064   }
1065 
1066   Status Flush() override {
1067     GIL takeGIL;
1068     auto r = m_py_obj.CallMethod("flush");
1069     if (!r)
1070       return Status(r.takeError());
1071     return Status();
1072   }
1073 
1074   Expected<File::OpenOptions> GetOptions() const override {
1075     GIL takeGIL;
1076     return GetOptionsForPyObject(m_py_obj);
1077   }
1078 
1079   static char ID;
1080   bool isA(const void *classID) const override {
1081     return classID == &ID || File::isA(classID);
1082   }
1083   static bool classof(const File *file) { return file->isA(&ID); }
1084 };
1085 char PythonIOFile::ID = 0;
1086 } // namespace
1087 
1088 namespace {
1089 class BinaryPythonFile : public PythonIOFile {
1090 protected:
1091   int m_descriptor;
1092 
1093 public:
1094   BinaryPythonFile(int fd, const PythonFile &file, bool borrowed)
1095       : PythonIOFile(file, borrowed),
1096         m_descriptor(File::DescriptorIsValid(fd) ? fd
1097                                                  : File::kInvalidDescriptor) {}
1098 
1099   int GetDescriptor() const override { return m_descriptor; }
1100 
1101   Status Write(const void *buf, size_t &num_bytes) override {
1102     GIL takeGIL;
1103     PyObject *pybuffer_p = PyMemoryView_FromMemory(
1104         const_cast<char *>((const char *)buf), num_bytes, PyBUF_READ);
1105     if (!pybuffer_p)
1106       return Status(llvm::make_error<PythonException>());
1107     auto pybuffer = Take<PythonObject>(pybuffer_p);
1108     num_bytes = 0;
1109     auto bytes_written = As<long long>(m_py_obj.CallMethod("write", pybuffer));
1110     if (!bytes_written)
1111       return Status(bytes_written.takeError());
1112     if (bytes_written.get() < 0)
1113       return Status(".write() method returned a negative number!");
1114     static_assert(sizeof(long long) >= sizeof(size_t), "overflow");
1115     num_bytes = bytes_written.get();
1116     return Status();
1117   }
1118 
1119   Status Read(void *buf, size_t &num_bytes) override {
1120     GIL takeGIL;
1121     static_assert(sizeof(long long) >= sizeof(size_t), "overflow");
1122     auto pybuffer_obj =
1123         m_py_obj.CallMethod("read", (unsigned long long)num_bytes);
1124     if (!pybuffer_obj)
1125       return Status(pybuffer_obj.takeError());
1126     num_bytes = 0;
1127     if (pybuffer_obj.get().IsNone()) {
1128       // EOF
1129       num_bytes = 0;
1130       return Status();
1131     }
1132     auto pybuffer = PythonBuffer::Create(pybuffer_obj.get());
1133     if (!pybuffer)
1134       return Status(pybuffer.takeError());
1135     memcpy(buf, pybuffer.get().get().buf, pybuffer.get().get().len);
1136     num_bytes = pybuffer.get().get().len;
1137     return Status();
1138   }
1139 };
1140 } // namespace
1141 
1142 namespace {
1143 class TextPythonFile : public PythonIOFile {
1144 protected:
1145   int m_descriptor;
1146 
1147 public:
1148   TextPythonFile(int fd, const PythonFile &file, bool borrowed)
1149       : PythonIOFile(file, borrowed),
1150         m_descriptor(File::DescriptorIsValid(fd) ? fd
1151                                                  : File::kInvalidDescriptor) {}
1152 
1153   int GetDescriptor() const override { return m_descriptor; }
1154 
1155   Status Write(const void *buf, size_t &num_bytes) override {
1156     GIL takeGIL;
1157     auto pystring =
1158         PythonString::FromUTF8(llvm::StringRef((const char *)buf, num_bytes));
1159     if (!pystring)
1160       return Status(pystring.takeError());
1161     num_bytes = 0;
1162     auto bytes_written =
1163         As<long long>(m_py_obj.CallMethod("write", pystring.get()));
1164     if (!bytes_written)
1165       return Status(bytes_written.takeError());
1166     if (bytes_written.get() < 0)
1167       return Status(".write() method returned a negative number!");
1168     static_assert(sizeof(long long) >= sizeof(size_t), "overflow");
1169     num_bytes = bytes_written.get();
1170     return Status();
1171   }
1172 
1173   Status Read(void *buf, size_t &num_bytes) override {
1174     GIL takeGIL;
1175     size_t num_chars = num_bytes / 6;
1176     size_t orig_num_bytes = num_bytes;
1177     num_bytes = 0;
1178     if (orig_num_bytes < 6) {
1179       return Status("can't read less than 6 bytes from a utf8 text stream");
1180     }
1181     auto pystring = As<PythonString>(
1182         m_py_obj.CallMethod("read", (unsigned long long)num_chars));
1183     if (!pystring)
1184       return Status(pystring.takeError());
1185     if (pystring.get().IsNone()) {
1186       // EOF
1187       return Status();
1188     }
1189     auto stringref = pystring.get().AsUTF8();
1190     if (!stringref)
1191       return Status(stringref.takeError());
1192     num_bytes = stringref.get().size();
1193     memcpy(buf, stringref.get().begin(), num_bytes);
1194     return Status();
1195   }
1196 };
1197 } // namespace
1198 
1199 #endif
1200 
1201 llvm::Expected<FileSP> PythonFile::ConvertToFile(bool borrowed) {
1202   if (!IsValid())
1203     return llvm::createStringError(llvm::inconvertibleErrorCode(),
1204                                    "invalid PythonFile");
1205 
1206   int fd = PyObject_AsFileDescriptor(m_py_obj);
1207   if (fd < 0) {
1208     PyErr_Clear();
1209     return ConvertToFileForcingUseOfScriptingIOMethods(borrowed);
1210   }
1211   auto options = GetOptionsForPyObject(*this);
1212   if (!options)
1213     return options.takeError();
1214 
1215   // LLDB and python will not share I/O buffers.  We should probably
1216   // flush the python buffers now.
1217   auto r = CallMethod("flush");
1218   if (!r)
1219     return r.takeError();
1220 
1221   FileSP file_sp;
1222   if (borrowed) {
1223     // In this case we we don't need to retain the python
1224     // object at all.
1225     file_sp = std::make_shared<NativeFile>(fd, options.get(), false);
1226   } else {
1227     file_sp = std::static_pointer_cast<File>(
1228         std::make_shared<SimplePythonFile>(*this, borrowed, fd, options.get()));
1229   }
1230   if (!file_sp->IsValid())
1231     return llvm::createStringError(llvm::inconvertibleErrorCode(),
1232                                    "invalid File");
1233 
1234   return file_sp;
1235 }
1236 
1237 llvm::Expected<FileSP>
1238 PythonFile::ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed) {
1239 
1240   assert(!PyErr_Occurred());
1241 
1242   if (!IsValid())
1243     return llvm::createStringError(llvm::inconvertibleErrorCode(),
1244                                    "invalid PythonFile");
1245 
1246 #if PY_MAJOR_VERSION < 3
1247 
1248   return llvm::createStringError(llvm::inconvertibleErrorCode(),
1249                                  "not supported on python 2");
1250 
1251 #else
1252 
1253   int fd = PyObject_AsFileDescriptor(m_py_obj);
1254   if (fd < 0) {
1255     PyErr_Clear();
1256     fd = File::kInvalidDescriptor;
1257   }
1258 
1259   auto io_module = PythonModule::Import("io");
1260   if (!io_module)
1261     return io_module.takeError();
1262   auto textIOBase = io_module.get().Get("TextIOBase");
1263   if (!textIOBase)
1264     return textIOBase.takeError();
1265   auto rawIOBase = io_module.get().Get("RawIOBase");
1266   if (!rawIOBase)
1267     return rawIOBase.takeError();
1268   auto bufferedIOBase = io_module.get().Get("BufferedIOBase");
1269   if (!bufferedIOBase)
1270     return bufferedIOBase.takeError();
1271 
1272   FileSP file_sp;
1273 
1274   auto isTextIO = IsInstance(textIOBase.get());
1275   if (!isTextIO)
1276     return isTextIO.takeError();
1277   if (isTextIO.get())
1278     file_sp = std::static_pointer_cast<File>(
1279         std::make_shared<TextPythonFile>(fd, *this, borrowed));
1280 
1281   auto isRawIO = IsInstance(rawIOBase.get());
1282   if (!isRawIO)
1283     return isRawIO.takeError();
1284   auto isBufferedIO = IsInstance(bufferedIOBase.get());
1285   if (!isBufferedIO)
1286     return isBufferedIO.takeError();
1287 
1288   if (isRawIO.get() || isBufferedIO.get()) {
1289     file_sp = std::static_pointer_cast<File>(
1290         std::make_shared<BinaryPythonFile>(fd, *this, borrowed));
1291   }
1292 
1293   if (!file_sp)
1294     return llvm::createStringError(llvm::inconvertibleErrorCode(),
1295                                    "python file is neither text nor binary");
1296 
1297   if (!file_sp->IsValid())
1298     return llvm::createStringError(llvm::inconvertibleErrorCode(),
1299                                    "invalid File");
1300 
1301   return file_sp;
1302 
1303 #endif
1304 }
1305 
1306 Expected<PythonFile> PythonFile::FromFile(File &file, const char *mode) {
1307   if (!file.IsValid())
1308     return llvm::createStringError(llvm::inconvertibleErrorCode(),
1309                                    "invalid file");
1310 
1311   if (auto *simple = llvm::dyn_cast<SimplePythonFile>(&file))
1312     return Retain<PythonFile>(simple->GetPythonObject());
1313 #if PY_MAJOR_VERSION >= 3
1314   if (auto *pythonio = llvm::dyn_cast<PythonIOFile>(&file))
1315     return Retain<PythonFile>(pythonio->GetPythonObject());
1316 #endif
1317 
1318   if (!mode) {
1319     auto m = file.GetOpenMode();
1320     if (!m)
1321       return m.takeError();
1322     mode = m.get();
1323   }
1324 
1325   PyObject *file_obj;
1326 #if PY_MAJOR_VERSION >= 3
1327   file_obj = PyFile_FromFd(file.GetDescriptor(), nullptr, mode, -1, nullptr,
1328                            "ignore", nullptr, 0);
1329 #else
1330   // Read through the Python source, doesn't seem to modify these strings
1331   char *cmode = const_cast<char *>(mode);
1332   // We pass ::flush instead of ::fclose here so we borrow the FILE* --
1333   // the lldb_private::File still owns it.
1334   file_obj =
1335       PyFile_FromFile(file.GetStream(), const_cast<char *>(""), cmode, ::fflush);
1336 #endif
1337 
1338   if (!file_obj)
1339     return exception();
1340 
1341   return Take<PythonFile>(file_obj);
1342 }
1343 
1344 #endif
1345