xref: /llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp (revision d3bd5b3d71ae9fc3a3a45e05d5dba6b1ecbcb2f5)
1 //===-- PythonDataObjects.cpp -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifdef LLDB_DISABLE_PYTHON
10 
11 // Python is disabled in this build
12 
13 #else
14 
15 #include "PythonDataObjects.h"
16 #include "ScriptInterpreterPython.h"
17 
18 #include "lldb/Host/File.h"
19 #include "lldb/Host/FileSystem.h"
20 #include "lldb/Interpreter/ScriptInterpreter.h"
21 #include "lldb/Utility/Log.h"
22 #include "lldb/Utility/Stream.h"
23 
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/ConvertUTF.h"
27 #include "llvm/Support/Errno.h"
28 
29 #include <stdio.h>
30 
31 using namespace lldb_private;
32 using namespace lldb;
33 using namespace lldb_private::python;
34 using llvm::Error;
35 using llvm::Expected;
36 
37 template <> Expected<bool> python::As<bool>(Expected<PythonObject> &&obj) {
38   if (!obj)
39     return obj.takeError();
40   return obj.get().IsTrue();
41 }
42 
43 template <>
44 Expected<long long> python::As<long long>(Expected<PythonObject> &&obj) {
45   if (!obj)
46     return obj.takeError();
47   return obj.get().AsLongLong();
48 }
49 
50 void StructuredPythonObject::Serialize(llvm::json::OStream &s) const {
51   s.value(llvm::formatv("Python Obj: {0:X}", GetValue()).str());
52 }
53 
54 // PythonObject
55 
56 void PythonObject::Dump(Stream &strm) const {
57   if (m_py_obj) {
58     FILE *file = llvm::sys::RetryAfterSignal(nullptr, ::tmpfile);
59     if (file) {
60       ::PyObject_Print(m_py_obj, file, 0);
61       const long length = ftell(file);
62       if (length) {
63         ::rewind(file);
64         std::vector<char> file_contents(length, '\0');
65         const size_t length_read =
66             ::fread(file_contents.data(), 1, file_contents.size(), file);
67         if (length_read > 0)
68           strm.Write(file_contents.data(), length_read);
69       }
70       ::fclose(file);
71     }
72   } else
73     strm.PutCString("NULL");
74 }
75 
76 PyObjectType PythonObject::GetObjectType() const {
77   if (!IsAllocated())
78     return PyObjectType::None;
79 
80   if (PythonModule::Check(m_py_obj))
81     return PyObjectType::Module;
82   if (PythonList::Check(m_py_obj))
83     return PyObjectType::List;
84   if (PythonTuple::Check(m_py_obj))
85     return PyObjectType::Tuple;
86   if (PythonDictionary::Check(m_py_obj))
87     return PyObjectType::Dictionary;
88   if (PythonString::Check(m_py_obj))
89     return PyObjectType::String;
90 #if PY_MAJOR_VERSION >= 3
91   if (PythonBytes::Check(m_py_obj))
92     return PyObjectType::Bytes;
93 #endif
94   if (PythonByteArray::Check(m_py_obj))
95     return PyObjectType::ByteArray;
96   if (PythonBoolean::Check(m_py_obj))
97     return PyObjectType::Boolean;
98   if (PythonInteger::Check(m_py_obj))
99     return PyObjectType::Integer;
100   if (PythonFile::Check(m_py_obj))
101     return PyObjectType::File;
102   if (PythonCallable::Check(m_py_obj))
103     return PyObjectType::Callable;
104   return PyObjectType::Unknown;
105 }
106 
107 PythonString PythonObject::Repr() const {
108   if (!m_py_obj)
109     return PythonString();
110   PyObject *repr = PyObject_Repr(m_py_obj);
111   if (!repr)
112     return PythonString();
113   return PythonString(PyRefType::Owned, repr);
114 }
115 
116 PythonString PythonObject::Str() const {
117   if (!m_py_obj)
118     return PythonString();
119   PyObject *str = PyObject_Str(m_py_obj);
120   if (!str)
121     return PythonString();
122   return PythonString(PyRefType::Owned, str);
123 }
124 
125 PythonObject
126 PythonObject::ResolveNameWithDictionary(llvm::StringRef name,
127                                         const PythonDictionary &dict) {
128   size_t dot_pos = name.find('.');
129   llvm::StringRef piece = name.substr(0, dot_pos);
130   PythonObject result = dict.GetItemForKey(PythonString(piece));
131   if (dot_pos == llvm::StringRef::npos) {
132     // There was no dot, we're done.
133     return result;
134   }
135 
136   // There was a dot.  The remaining portion of the name should be looked up in
137   // the context of the object that was found in the dictionary.
138   return result.ResolveName(name.substr(dot_pos + 1));
139 }
140 
141 PythonObject PythonObject::ResolveName(llvm::StringRef name) const {
142   // Resolve the name in the context of the specified object.  If, for example,
143   // `this` refers to a PyModule, then this will look for `name` in this
144   // module.  If `this` refers to a PyType, then it will resolve `name` as an
145   // attribute of that type.  If `this` refers to an instance of an object,
146   // then it will resolve `name` as the value of the specified field.
147   //
148   // This function handles dotted names so that, for example, if `m_py_obj`
149   // refers to the `sys` module, and `name` == "path.append", then it will find
150   // the function `sys.path.append`.
151 
152   size_t dot_pos = name.find('.');
153   if (dot_pos == llvm::StringRef::npos) {
154     // No dots in the name, we should be able to find the value immediately as
155     // an attribute of `m_py_obj`.
156     return GetAttributeValue(name);
157   }
158 
159   // Look up the first piece of the name, and resolve the rest as a child of
160   // that.
161   PythonObject parent = ResolveName(name.substr(0, dot_pos));
162   if (!parent.IsAllocated())
163     return PythonObject();
164 
165   // Tail recursion.. should be optimized by the compiler
166   return parent.ResolveName(name.substr(dot_pos + 1));
167 }
168 
169 bool PythonObject::HasAttribute(llvm::StringRef attr) const {
170   if (!IsValid())
171     return false;
172   PythonString py_attr(attr);
173   return !!PyObject_HasAttr(m_py_obj, py_attr.get());
174 }
175 
176 PythonObject PythonObject::GetAttributeValue(llvm::StringRef attr) const {
177   if (!IsValid())
178     return PythonObject();
179 
180   PythonString py_attr(attr);
181   if (!PyObject_HasAttr(m_py_obj, py_attr.get()))
182     return PythonObject();
183 
184   return PythonObject(PyRefType::Owned,
185                       PyObject_GetAttr(m_py_obj, py_attr.get()));
186 }
187 
188 StructuredData::ObjectSP PythonObject::CreateStructuredObject() const {
189   switch (GetObjectType()) {
190   case PyObjectType::Dictionary:
191     return PythonDictionary(PyRefType::Borrowed, m_py_obj)
192         .CreateStructuredDictionary();
193   case PyObjectType::Boolean:
194     return PythonBoolean(PyRefType::Borrowed, m_py_obj)
195         .CreateStructuredBoolean();
196   case PyObjectType::Integer:
197     return PythonInteger(PyRefType::Borrowed, m_py_obj)
198         .CreateStructuredInteger();
199   case PyObjectType::List:
200     return PythonList(PyRefType::Borrowed, m_py_obj).CreateStructuredArray();
201   case PyObjectType::String:
202     return PythonString(PyRefType::Borrowed, m_py_obj).CreateStructuredString();
203   case PyObjectType::Bytes:
204     return PythonBytes(PyRefType::Borrowed, m_py_obj).CreateStructuredString();
205   case PyObjectType::ByteArray:
206     return PythonByteArray(PyRefType::Borrowed, m_py_obj)
207         .CreateStructuredString();
208   case PyObjectType::None:
209     return StructuredData::ObjectSP();
210   default:
211     return StructuredData::ObjectSP(new StructuredPythonObject(m_py_obj));
212   }
213 }
214 
215 // PythonString
216 
217 PythonBytes::PythonBytes(llvm::ArrayRef<uint8_t> bytes) { SetBytes(bytes); }
218 
219 PythonBytes::PythonBytes(const uint8_t *bytes, size_t length) {
220   SetBytes(llvm::ArrayRef<uint8_t>(bytes, length));
221 }
222 
223 bool PythonBytes::Check(PyObject *py_obj) {
224   if (!py_obj)
225     return false;
226   return PyBytes_Check(py_obj);
227 }
228 
229 llvm::ArrayRef<uint8_t> PythonBytes::GetBytes() const {
230   if (!IsValid())
231     return llvm::ArrayRef<uint8_t>();
232 
233   Py_ssize_t size;
234   char *c;
235 
236   PyBytes_AsStringAndSize(m_py_obj, &c, &size);
237   return llvm::ArrayRef<uint8_t>(reinterpret_cast<uint8_t *>(c), size);
238 }
239 
240 size_t PythonBytes::GetSize() const {
241   if (!IsValid())
242     return 0;
243   return PyBytes_Size(m_py_obj);
244 }
245 
246 void PythonBytes::SetBytes(llvm::ArrayRef<uint8_t> bytes) {
247   const char *data = reinterpret_cast<const char *>(bytes.data());
248   PyObject *py_bytes = PyBytes_FromStringAndSize(data, bytes.size());
249   PythonObject::Reset(PyRefType::Owned, py_bytes);
250 }
251 
252 StructuredData::StringSP PythonBytes::CreateStructuredString() const {
253   StructuredData::StringSP result(new StructuredData::String);
254   Py_ssize_t size;
255   char *c;
256   PyBytes_AsStringAndSize(m_py_obj, &c, &size);
257   result->SetValue(std::string(c, size));
258   return result;
259 }
260 
261 PythonByteArray::PythonByteArray(llvm::ArrayRef<uint8_t> bytes)
262     : PythonByteArray(bytes.data(), bytes.size()) {}
263 
264 PythonByteArray::PythonByteArray(const uint8_t *bytes, size_t length) {
265   const char *str = reinterpret_cast<const char *>(bytes);
266   Reset(PyRefType::Owned, PyByteArray_FromStringAndSize(str, length));
267 }
268 
269 bool PythonByteArray::Check(PyObject *py_obj) {
270   if (!py_obj)
271     return false;
272   return PyByteArray_Check(py_obj);
273 }
274 
275 llvm::ArrayRef<uint8_t> PythonByteArray::GetBytes() const {
276   if (!IsValid())
277     return llvm::ArrayRef<uint8_t>();
278 
279   char *c = PyByteArray_AsString(m_py_obj);
280   size_t size = GetSize();
281   return llvm::ArrayRef<uint8_t>(reinterpret_cast<uint8_t *>(c), size);
282 }
283 
284 size_t PythonByteArray::GetSize() const {
285   if (!IsValid())
286     return 0;
287 
288   return PyByteArray_Size(m_py_obj);
289 }
290 
291 StructuredData::StringSP PythonByteArray::CreateStructuredString() const {
292   StructuredData::StringSP result(new StructuredData::String);
293   llvm::ArrayRef<uint8_t> bytes = GetBytes();
294   const char *str = reinterpret_cast<const char *>(bytes.data());
295   result->SetValue(std::string(str, bytes.size()));
296   return result;
297 }
298 
299 // PythonString
300 
301 Expected<PythonString> PythonString::FromUTF8(llvm::StringRef string) {
302 #if PY_MAJOR_VERSION >= 3
303   PyObject *str = PyUnicode_FromStringAndSize(string.data(), string.size());
304 #else
305   PyObject *str = PyString_FromStringAndSize(string.data(), string.size());
306 #endif
307   if (!str)
308     return llvm::make_error<PythonException>();
309   return Take<PythonString>(str);
310 }
311 
312 PythonString::PythonString(llvm::StringRef string) { SetString(string); }
313 
314 bool PythonString::Check(PyObject *py_obj) {
315   if (!py_obj)
316     return false;
317 
318   if (PyUnicode_Check(py_obj))
319     return true;
320 #if PY_MAJOR_VERSION < 3
321   if (PyString_Check(py_obj))
322     return true;
323 #endif
324   return false;
325 }
326 
327 void PythonString::Convert(PyRefType &type, PyObject *&py_obj) {
328 #if PY_MAJOR_VERSION < 3
329   // In Python 2, Don't store PyUnicode objects directly, because we need
330   // access to their underlying character buffers which Python 2 doesn't
331   // provide.
332   if (PyUnicode_Check(py_obj)) {
333     PyObject *s = PyUnicode_AsUTF8String(py_obj);
334     if (s == nullptr) {
335       PyErr_Clear();
336       if (type == PyRefType::Owned)
337         Py_DECREF(py_obj);
338       return;
339     }
340     if (type == PyRefType::Owned)
341       Py_DECREF(py_obj);
342     else
343       type = PyRefType::Owned;
344     py_obj = s;
345   }
346 #endif
347 }
348 
349 llvm::StringRef PythonString::GetString() const {
350   auto s = AsUTF8();
351   if (!s) {
352     llvm::consumeError(s.takeError());
353     return llvm::StringRef("");
354   }
355   return s.get();
356 }
357 
358 Expected<llvm::StringRef> PythonString::AsUTF8() const {
359   if (!IsValid())
360     return nullDeref();
361 
362   Py_ssize_t size;
363   const char *data;
364 
365 #if PY_MAJOR_VERSION >= 3
366   data = PyUnicode_AsUTF8AndSize(m_py_obj, &size);
367 #else
368   char *c = NULL;
369   int r = PyString_AsStringAndSize(m_py_obj, &c, &size);
370   if (r < 0)
371     c = NULL;
372   data = c;
373 #endif
374 
375   if (!data)
376     return exception();
377 
378   return llvm::StringRef(data, size);
379 }
380 
381 size_t PythonString::GetSize() const {
382   if (IsValid()) {
383 #if PY_MAJOR_VERSION >= 3
384     return PyUnicode_GetSize(m_py_obj);
385 #else
386     return PyString_Size(m_py_obj);
387 #endif
388   }
389   return 0;
390 }
391 
392 void PythonString::SetString(llvm::StringRef string) {
393   auto s = FromUTF8(string);
394   if (!s) {
395     llvm::consumeError(s.takeError());
396     Reset();
397   } else {
398     PythonObject::Reset(std::move(s.get()));
399   }
400 }
401 
402 StructuredData::StringSP PythonString::CreateStructuredString() const {
403   StructuredData::StringSP result(new StructuredData::String);
404   result->SetValue(GetString());
405   return result;
406 }
407 
408 // PythonInteger
409 
410 PythonInteger::PythonInteger(int64_t value) { SetInteger(value); }
411 
412 bool PythonInteger::Check(PyObject *py_obj) {
413   if (!py_obj)
414     return false;
415 
416 #if PY_MAJOR_VERSION >= 3
417   // Python 3 does not have PyInt_Check.  There is only one type of integral
418   // value, long.
419   return PyLong_Check(py_obj);
420 #else
421   return PyLong_Check(py_obj) || PyInt_Check(py_obj);
422 #endif
423 }
424 
425 void PythonInteger::Convert(PyRefType &type, PyObject *&py_obj) {
426 #if PY_MAJOR_VERSION < 3
427   // Always store this as a PyLong, which makes interoperability between Python
428   // 2.x and Python 3.x easier.  This is only necessary in 2.x, since 3.x
429   // doesn't even have a PyInt.
430   if (PyInt_Check(py_obj)) {
431     // Since we converted the original object to a different type, the new
432     // object is an owned object regardless of the ownership semantics
433     // requested by the user.
434     long long value = PyInt_AsLong(py_obj);
435     PyObject *l = nullptr;
436     if (!PyErr_Occurred())
437       l = PyLong_FromLongLong(value);
438     if (l == nullptr) {
439       PyErr_Clear();
440       if (type == PyRefType::Owned)
441         Py_DECREF(py_obj);
442       return;
443     }
444     if (type == PyRefType::Owned)
445       Py_DECREF(py_obj);
446     else
447       type = PyRefType::Owned;
448     py_obj = l;
449   }
450 #endif
451 }
452 
453 int64_t PythonInteger::GetInteger() const {
454   if (m_py_obj) {
455     assert(PyLong_Check(m_py_obj) &&
456            "PythonInteger::GetInteger has a PyObject that isn't a PyLong");
457 
458     int overflow = 0;
459     int64_t result = PyLong_AsLongLongAndOverflow(m_py_obj, &overflow);
460     if (overflow != 0) {
461       // We got an integer that overflows, like 18446744072853913392L we can't
462       // use PyLong_AsLongLong() as it will return 0xffffffffffffffff. If we
463       // use the unsigned long long it will work as expected.
464       const uint64_t uval = PyLong_AsUnsignedLongLong(m_py_obj);
465       result = static_cast<int64_t>(uval);
466     }
467     return result;
468   }
469   return UINT64_MAX;
470 }
471 
472 void PythonInteger::SetInteger(int64_t value) {
473   PythonObject::Reset(PyRefType::Owned, PyLong_FromLongLong(value));
474 }
475 
476 StructuredData::IntegerSP PythonInteger::CreateStructuredInteger() const {
477   StructuredData::IntegerSP result(new StructuredData::Integer);
478   result->SetValue(GetInteger());
479   return result;
480 }
481 
482 // PythonBoolean
483 
484 PythonBoolean::PythonBoolean(bool value) {
485   SetValue(value);
486 }
487 
488 bool PythonBoolean::Check(PyObject *py_obj) {
489   return py_obj ? PyBool_Check(py_obj) : false;
490 }
491 
492 bool PythonBoolean::GetValue() const {
493   return m_py_obj ? PyObject_IsTrue(m_py_obj) : false;
494 }
495 
496 void PythonBoolean::SetValue(bool value) {
497   PythonObject::Reset(PyRefType::Owned, PyBool_FromLong(value));
498 }
499 
500 StructuredData::BooleanSP PythonBoolean::CreateStructuredBoolean() const {
501   StructuredData::BooleanSP result(new StructuredData::Boolean);
502   result->SetValue(GetValue());
503   return result;
504 }
505 
506 // PythonList
507 
508 PythonList::PythonList(PyInitialValue value) {
509   if (value == PyInitialValue::Empty)
510     Reset(PyRefType::Owned, PyList_New(0));
511 }
512 
513 PythonList::PythonList(int list_size) {
514   Reset(PyRefType::Owned, PyList_New(list_size));
515 }
516 
517 bool PythonList::Check(PyObject *py_obj) {
518   if (!py_obj)
519     return false;
520   return PyList_Check(py_obj);
521 }
522 
523 uint32_t PythonList::GetSize() const {
524   if (IsValid())
525     return PyList_GET_SIZE(m_py_obj);
526   return 0;
527 }
528 
529 PythonObject PythonList::GetItemAtIndex(uint32_t index) const {
530   if (IsValid())
531     return PythonObject(PyRefType::Borrowed, PyList_GetItem(m_py_obj, index));
532   return PythonObject();
533 }
534 
535 void PythonList::SetItemAtIndex(uint32_t index, const PythonObject &object) {
536   if (IsAllocated() && object.IsValid()) {
537     // PyList_SetItem is documented to "steal" a reference, so we need to
538     // convert it to an owned reference by incrementing it.
539     Py_INCREF(object.get());
540     PyList_SetItem(m_py_obj, index, object.get());
541   }
542 }
543 
544 void PythonList::AppendItem(const PythonObject &object) {
545   if (IsAllocated() && object.IsValid()) {
546     // `PyList_Append` does *not* steal a reference, so do not call `Py_INCREF`
547     // here like we do with `PyList_SetItem`.
548     PyList_Append(m_py_obj, object.get());
549   }
550 }
551 
552 StructuredData::ArraySP PythonList::CreateStructuredArray() const {
553   StructuredData::ArraySP result(new StructuredData::Array);
554   uint32_t count = GetSize();
555   for (uint32_t i = 0; i < count; ++i) {
556     PythonObject obj = GetItemAtIndex(i);
557     result->AddItem(obj.CreateStructuredObject());
558   }
559   return result;
560 }
561 
562 // PythonTuple
563 
564 PythonTuple::PythonTuple(PyInitialValue value) {
565   if (value == PyInitialValue::Empty)
566     Reset(PyRefType::Owned, PyTuple_New(0));
567 }
568 
569 PythonTuple::PythonTuple(int tuple_size) {
570   Reset(PyRefType::Owned, PyTuple_New(tuple_size));
571 }
572 
573 PythonTuple::PythonTuple(std::initializer_list<PythonObject> objects) {
574   m_py_obj = PyTuple_New(objects.size());
575 
576   uint32_t idx = 0;
577   for (auto object : objects) {
578     if (object.IsValid())
579       SetItemAtIndex(idx, object);
580     idx++;
581   }
582 }
583 
584 PythonTuple::PythonTuple(std::initializer_list<PyObject *> objects) {
585   m_py_obj = PyTuple_New(objects.size());
586 
587   uint32_t idx = 0;
588   for (auto py_object : objects) {
589     PythonObject object(PyRefType::Borrowed, py_object);
590     if (object.IsValid())
591       SetItemAtIndex(idx, object);
592     idx++;
593   }
594 }
595 
596 bool PythonTuple::Check(PyObject *py_obj) {
597   if (!py_obj)
598     return false;
599   return PyTuple_Check(py_obj);
600 }
601 
602 uint32_t PythonTuple::GetSize() const {
603   if (IsValid())
604     return PyTuple_GET_SIZE(m_py_obj);
605   return 0;
606 }
607 
608 PythonObject PythonTuple::GetItemAtIndex(uint32_t index) const {
609   if (IsValid())
610     return PythonObject(PyRefType::Borrowed, PyTuple_GetItem(m_py_obj, index));
611   return PythonObject();
612 }
613 
614 void PythonTuple::SetItemAtIndex(uint32_t index, const PythonObject &object) {
615   if (IsAllocated() && object.IsValid()) {
616     // PyTuple_SetItem is documented to "steal" a reference, so we need to
617     // convert it to an owned reference by incrementing it.
618     Py_INCREF(object.get());
619     PyTuple_SetItem(m_py_obj, index, object.get());
620   }
621 }
622 
623 StructuredData::ArraySP PythonTuple::CreateStructuredArray() const {
624   StructuredData::ArraySP result(new StructuredData::Array);
625   uint32_t count = GetSize();
626   for (uint32_t i = 0; i < count; ++i) {
627     PythonObject obj = GetItemAtIndex(i);
628     result->AddItem(obj.CreateStructuredObject());
629   }
630   return result;
631 }
632 
633 // PythonDictionary
634 
635 PythonDictionary::PythonDictionary(PyInitialValue value) {
636   if (value == PyInitialValue::Empty)
637     Reset(PyRefType::Owned, PyDict_New());
638 }
639 
640 bool PythonDictionary::Check(PyObject *py_obj) {
641   if (!py_obj)
642     return false;
643 
644   return PyDict_Check(py_obj);
645 }
646 
647 uint32_t PythonDictionary::GetSize() const {
648   if (IsValid())
649     return PyDict_Size(m_py_obj);
650   return 0;
651 }
652 
653 PythonList PythonDictionary::GetKeys() const {
654   if (IsValid())
655     return PythonList(PyRefType::Owned, PyDict_Keys(m_py_obj));
656   return PythonList(PyInitialValue::Invalid);
657 }
658 
659 PythonObject PythonDictionary::GetItemForKey(const PythonObject &key) const {
660   if (IsAllocated() && key.IsValid())
661     return PythonObject(PyRefType::Borrowed,
662                         PyDict_GetItem(m_py_obj, key.get()));
663   return PythonObject();
664 }
665 
666 void PythonDictionary::SetItemForKey(const PythonObject &key,
667                                      const PythonObject &value) {
668   if (IsAllocated() && key.IsValid() && value.IsValid())
669     PyDict_SetItem(m_py_obj, key.get(), value.get());
670 }
671 
672 StructuredData::DictionarySP
673 PythonDictionary::CreateStructuredDictionary() const {
674   StructuredData::DictionarySP result(new StructuredData::Dictionary);
675   PythonList keys(GetKeys());
676   uint32_t num_keys = keys.GetSize();
677   for (uint32_t i = 0; i < num_keys; ++i) {
678     PythonObject key = keys.GetItemAtIndex(i);
679     PythonObject value = GetItemForKey(key);
680     StructuredData::ObjectSP structured_value = value.CreateStructuredObject();
681     result->AddItem(key.Str().GetString(), structured_value);
682   }
683   return result;
684 }
685 
686 PythonModule PythonModule::BuiltinsModule() {
687 #if PY_MAJOR_VERSION >= 3
688   return AddModule("builtins");
689 #else
690   return AddModule("__builtin__");
691 #endif
692 }
693 
694 PythonModule PythonModule::MainModule() { return AddModule("__main__"); }
695 
696 PythonModule PythonModule::AddModule(llvm::StringRef module) {
697   std::string str = module.str();
698   return PythonModule(PyRefType::Borrowed, PyImport_AddModule(str.c_str()));
699 }
700 
701 Expected<PythonModule> PythonModule::Import(const char *name) {
702   PyObject *mod = PyImport_ImportModule(name);
703   if (!mod)
704     return exception();
705   return Take<PythonModule>(mod);
706 }
707 
708 Expected<PythonObject> PythonModule::Get(const char *name) {
709   if (!IsValid())
710     return nullDeref();
711   PyObject *dict = PyModule_GetDict(m_py_obj);
712   if (!dict)
713     return exception();
714   PyObject *item = PyDict_GetItemString(dict, name);
715   if (!item)
716     return exception();
717   return Retain<PythonObject>(item);
718 }
719 
720 bool PythonModule::Check(PyObject *py_obj) {
721   if (!py_obj)
722     return false;
723 
724   return PyModule_Check(py_obj);
725 }
726 
727 PythonDictionary PythonModule::GetDictionary() const {
728   return PythonDictionary(PyRefType::Borrowed, PyModule_GetDict(m_py_obj));
729 }
730 
731 bool PythonCallable::Check(PyObject *py_obj) {
732   if (!py_obj)
733     return false;
734 
735   return PyCallable_Check(py_obj);
736 }
737 
738 PythonCallable::ArgInfo PythonCallable::GetNumInitArguments() const {
739   ArgInfo result = {0, false, false, false};
740   if (!IsValid())
741     return result;
742 
743   PythonObject __init__ = GetAttributeValue("__init__");
744   if (__init__.IsValid() ) {
745     auto __init_callable__ = __init__.AsType<PythonCallable>();
746     if (__init_callable__.IsValid())
747       return __init_callable__.GetNumArguments();
748   }
749   return result;
750 }
751 
752 PythonCallable::ArgInfo PythonCallable::GetNumArguments() const {
753   ArgInfo result = {0, false, false, false};
754   if (!IsValid())
755     return result;
756 
757   PyObject *py_func_obj = m_py_obj;
758   if (PyMethod_Check(py_func_obj)) {
759     py_func_obj = PyMethod_GET_FUNCTION(py_func_obj);
760     PythonObject im_self = GetAttributeValue("im_self");
761     if (im_self.IsValid() && !im_self.IsNone())
762       result.is_bound_method = true;
763   } else {
764     // see if this is a callable object with an __call__ method
765     if (!PyFunction_Check(py_func_obj)) {
766       PythonObject __call__ = GetAttributeValue("__call__");
767       if (__call__.IsValid()) {
768         auto __callable__ = __call__.AsType<PythonCallable>();
769         if (__callable__.IsValid()) {
770           py_func_obj = PyMethod_GET_FUNCTION(__callable__.get());
771           PythonObject im_self = GetAttributeValue("im_self");
772           if (im_self.IsValid() && !im_self.IsNone())
773             result.is_bound_method = true;
774         }
775       }
776     }
777   }
778 
779   if (!py_func_obj)
780     return result;
781 
782   PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(py_func_obj);
783   if (!code)
784     return result;
785 
786   result.count = code->co_argcount;
787   result.has_varargs = !!(code->co_flags & CO_VARARGS);
788   result.has_kwargs = !!(code->co_flags & CO_VARKEYWORDS);
789   return result;
790 }
791 
792 PythonObject PythonCallable::operator()() {
793   return PythonObject(PyRefType::Owned, PyObject_CallObject(m_py_obj, nullptr));
794 }
795 
796 PythonObject PythonCallable::
797 operator()(std::initializer_list<PyObject *> args) {
798   PythonTuple arg_tuple(args);
799   return PythonObject(PyRefType::Owned,
800                       PyObject_CallObject(m_py_obj, arg_tuple.get()));
801 }
802 
803 PythonObject PythonCallable::
804 operator()(std::initializer_list<PythonObject> args) {
805   PythonTuple arg_tuple(args);
806   return PythonObject(PyRefType::Owned,
807                       PyObject_CallObject(m_py_obj, arg_tuple.get()));
808 }
809 
810 bool PythonFile::Check(PyObject *py_obj) {
811   if (!py_obj)
812     return false;
813 #if PY_MAJOR_VERSION < 3
814   return PyFile_Check(py_obj);
815 #else
816   // In Python 3, there is no `PyFile_Check`, and in fact PyFile is not even a
817   // first-class object type anymore.  `PyFile_FromFd` is just a thin wrapper
818   // over `io.open()`, which returns some object derived from `io.IOBase`. As a
819   // result, the only way to detect a file in Python 3 is to check whether it
820   // inherits from `io.IOBase`.
821   auto io_module = PythonModule::Import("io");
822   if (!io_module) {
823     llvm::consumeError(io_module.takeError());
824     return false;
825   }
826   auto iobase = io_module.get().Get("IOBase");
827   if (!iobase) {
828     llvm::consumeError(iobase.takeError());
829     return false;
830   }
831   int r = PyObject_IsInstance(py_obj, iobase.get().get());
832   if (r < 0) {
833     llvm::consumeError(exception()); // clear the exception and log it.
834     return false;
835   }
836   return !!r;
837 #endif
838 }
839 
840 FileUP PythonFile::GetUnderlyingFile() const {
841   if (!IsValid())
842     return nullptr;
843 
844   // We don't own the file descriptor returned by this function, make sure the
845   // File object knows about that.
846   PythonString py_mode = GetAttributeValue("mode").AsType<PythonString>();
847   auto options = File::GetOptionsFromMode(py_mode.GetString());
848   if (!options) {
849     llvm::consumeError(options.takeError());
850     return nullptr;
851   }
852   auto file = std::unique_ptr<File>(new NativeFile(
853       PyObject_AsFileDescriptor(m_py_obj), options.get(), false));
854   if (!file->IsValid())
855     return nullptr;
856   return file;
857 }
858 
859 namespace {
860 class GIL {
861 public:
862   GIL() {
863     m_state = PyGILState_Ensure();
864     assert(!PyErr_Occurred());
865   }
866   ~GIL() { PyGILState_Release(m_state); }
867 
868 protected:
869   PyGILState_STATE m_state;
870 };
871 } // namespace
872 
873 const char *PythonException::toCString() const {
874   if (!m_repr_bytes)
875     return "unknown exception";
876   return PyBytes_AS_STRING(m_repr_bytes);
877 }
878 
879 PythonException::PythonException(const char *caller) {
880   assert(PyErr_Occurred());
881   m_exception_type = m_exception = m_traceback = m_repr_bytes = NULL;
882   PyErr_Fetch(&m_exception_type, &m_exception, &m_traceback);
883   PyErr_NormalizeException(&m_exception_type, &m_exception, &m_traceback);
884   PyErr_Clear();
885   if (m_exception) {
886     PyObject *repr = PyObject_Repr(m_exception);
887     if (repr) {
888       m_repr_bytes = PyUnicode_AsEncodedString(repr, "utf-8", nullptr);
889       if (!m_repr_bytes) {
890         PyErr_Clear();
891       }
892       Py_XDECREF(repr);
893     } else {
894       PyErr_Clear();
895     }
896   }
897   Log *log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_SCRIPT);
898   if (caller)
899     LLDB_LOGF(log, "%s failed with exception: %s", caller, toCString());
900   else
901     LLDB_LOGF(log, "python exception: %s", toCString());
902 }
903 void PythonException::Restore() {
904   if (m_exception_type && m_exception) {
905     PyErr_Restore(m_exception_type, m_exception, m_traceback);
906   } else {
907     PyErr_SetString(PyExc_Exception, toCString());
908   }
909   m_exception_type = m_exception = m_traceback = NULL;
910 }
911 
912 PythonException::~PythonException() {
913   Py_XDECREF(m_exception_type);
914   Py_XDECREF(m_exception);
915   Py_XDECREF(m_traceback);
916   Py_XDECREF(m_repr_bytes);
917 }
918 
919 void PythonException::log(llvm::raw_ostream &OS) const { OS << toCString(); }
920 
921 std::error_code PythonException::convertToErrorCode() const {
922   return llvm::inconvertibleErrorCode();
923 }
924 
925 char PythonException::ID = 0;
926 
927 llvm::Expected<File::OpenOptions>
928 GetOptionsForPyObject(const PythonObject &obj) {
929 #if PY_MAJOR_VERSION >= 3
930   auto options = File::OpenOptions(0);
931   auto readable = As<bool>(obj.CallMethod("readable"));
932   if (!readable)
933     return readable.takeError();
934   auto writable = As<bool>(obj.CallMethod("writable"));
935   if (!writable)
936     return writable.takeError();
937   if (readable.get())
938     options |= File::eOpenOptionRead;
939   if (writable.get())
940     options |= File::eOpenOptionWrite;
941   return options;
942 #else
943   PythonString py_mode = obj.GetAttributeValue("mode").AsType<PythonString>();
944   return File::GetOptionsFromMode(py_mode.GetString());
945 #endif
946 }
947 
948 // Base class template for python files.   All it knows how to do
949 // is hold a reference to the python object and close or flush it
950 // when the File is closed.
951 namespace {
952 template <typename Base> class OwnedPythonFile : public Base {
953 public:
954   template <typename... Args>
955   OwnedPythonFile(const PythonFile &file, bool borrowed, Args... args)
956       : Base(args...), m_py_obj(file), m_borrowed(borrowed) {
957     assert(m_py_obj);
958   }
959 
960   ~OwnedPythonFile() override {
961     assert(m_py_obj);
962     GIL takeGIL;
963     Close();
964     m_py_obj.Reset();
965   }
966 
967   bool IsPythonSideValid() const {
968     GIL takeGIL;
969     auto closed = As<bool>(m_py_obj.GetAttribute("closed"));
970     if (!closed) {
971       llvm::consumeError(closed.takeError());
972       return false;
973     }
974     return !closed.get();
975   }
976 
977   bool IsValid() const override {
978     return IsPythonSideValid() && Base::IsValid();
979   }
980 
981   Status Close() override {
982     assert(m_py_obj);
983     Status py_error, base_error;
984     GIL takeGIL;
985     if (!m_borrowed) {
986       auto r = m_py_obj.CallMethod("close");
987       if (!r)
988         py_error = Status(r.takeError());
989     }
990     base_error = Base::Close();
991     if (py_error.Fail())
992       return py_error;
993     return base_error;
994   };
995 
996   PyObject *GetPythonObject() const {
997     assert(m_py_obj.IsValid());
998     return m_py_obj.get();
999   }
1000 
1001   static bool classof(const File *file) = delete;
1002 
1003 protected:
1004   PythonFile m_py_obj;
1005   bool m_borrowed;
1006 };
1007 } // namespace
1008 
1009 // A SimplePythonFile is a OwnedPythonFile that just does all I/O as
1010 // a NativeFile
1011 namespace {
1012 class SimplePythonFile : public OwnedPythonFile<NativeFile> {
1013 public:
1014   SimplePythonFile(const PythonFile &file, bool borrowed, int fd,
1015                    File::OpenOptions options)
1016       : OwnedPythonFile(file, borrowed, fd, options, false) {}
1017 
1018   static char ID;
1019   bool isA(const void *classID) const override {
1020     return classID == &ID || NativeFile::isA(classID);
1021   }
1022   static bool classof(const File *file) { return file->isA(&ID); }
1023 };
1024 char SimplePythonFile::ID = 0;
1025 } // namespace
1026 
1027 #if PY_MAJOR_VERSION >= 3
1028 
1029 namespace {
1030 class PythonBuffer {
1031 public:
1032   PythonBuffer &operator=(const PythonBuffer &) = delete;
1033   PythonBuffer(const PythonBuffer &) = delete;
1034 
1035   static Expected<PythonBuffer> Create(PythonObject &obj,
1036                                        int flags = PyBUF_SIMPLE) {
1037     Py_buffer py_buffer = {};
1038     PyObject_GetBuffer(obj.get(), &py_buffer, flags);
1039     if (!py_buffer.obj)
1040       return llvm::make_error<PythonException>();
1041     return PythonBuffer(py_buffer);
1042   }
1043 
1044   PythonBuffer(PythonBuffer &&other) {
1045     m_buffer = other.m_buffer;
1046     other.m_buffer.obj = nullptr;
1047   }
1048 
1049   ~PythonBuffer() {
1050     if (m_buffer.obj)
1051       PyBuffer_Release(&m_buffer);
1052   }
1053 
1054   Py_buffer &get() { return m_buffer; }
1055 
1056 private:
1057   // takes ownership of the buffer.
1058   PythonBuffer(const Py_buffer &py_buffer) : m_buffer(py_buffer) {}
1059   Py_buffer m_buffer;
1060 };
1061 } // namespace
1062 
1063 // Shared methods between TextPythonFile and BinaryPythonFile
1064 namespace {
1065 class PythonIOFile : public OwnedPythonFile<File> {
1066 public:
1067   PythonIOFile(const PythonFile &file, bool borrowed)
1068       : OwnedPythonFile(file, borrowed) {}
1069 
1070   ~PythonIOFile() override { Close(); }
1071 
1072   bool IsValid() const override { return IsPythonSideValid(); }
1073 
1074   Status Close() override {
1075     assert(m_py_obj);
1076     GIL takeGIL;
1077     if (m_borrowed)
1078       return Flush();
1079     auto r = m_py_obj.CallMethod("close");
1080     if (!r)
1081       return Status(r.takeError());
1082     return Status();
1083   }
1084 
1085   Status Flush() override {
1086     GIL takeGIL;
1087     auto r = m_py_obj.CallMethod("flush");
1088     if (!r)
1089       return Status(r.takeError());
1090     return Status();
1091   }
1092 
1093   Expected<File::OpenOptions> GetOptions() const override {
1094     GIL takeGIL;
1095     return GetOptionsForPyObject(m_py_obj);
1096   }
1097 
1098   static char ID;
1099   bool isA(const void *classID) const override {
1100     return classID == &ID || File::isA(classID);
1101   }
1102   static bool classof(const File *file) { return file->isA(&ID); }
1103 };
1104 char PythonIOFile::ID = 0;
1105 } // namespace
1106 
1107 namespace {
1108 class BinaryPythonFile : public PythonIOFile {
1109 protected:
1110   int m_descriptor;
1111 
1112 public:
1113   BinaryPythonFile(int fd, const PythonFile &file, bool borrowed)
1114       : PythonIOFile(file, borrowed),
1115         m_descriptor(File::DescriptorIsValid(fd) ? fd
1116                                                  : File::kInvalidDescriptor) {}
1117 
1118   int GetDescriptor() const override { return m_descriptor; }
1119 
1120   Status Write(const void *buf, size_t &num_bytes) override {
1121     GIL takeGIL;
1122     PyObject *pybuffer_p = PyMemoryView_FromMemory(
1123         const_cast<char *>((const char *)buf), num_bytes, PyBUF_READ);
1124     if (!pybuffer_p)
1125       return Status(llvm::make_error<PythonException>());
1126     auto pybuffer = Take<PythonObject>(pybuffer_p);
1127     num_bytes = 0;
1128     auto bytes_written = As<long long>(m_py_obj.CallMethod("write", pybuffer));
1129     if (!bytes_written)
1130       return Status(bytes_written.takeError());
1131     if (bytes_written.get() < 0)
1132       return Status(".write() method returned a negative number!");
1133     static_assert(sizeof(long long) >= sizeof(size_t), "overflow");
1134     num_bytes = bytes_written.get();
1135     return Status();
1136   }
1137 
1138   Status Read(void *buf, size_t &num_bytes) override {
1139     GIL takeGIL;
1140     static_assert(sizeof(long long) >= sizeof(size_t), "overflow");
1141     auto pybuffer_obj =
1142         m_py_obj.CallMethod("read", (unsigned long long)num_bytes);
1143     if (!pybuffer_obj)
1144       return Status(pybuffer_obj.takeError());
1145     num_bytes = 0;
1146     if (pybuffer_obj.get().IsNone()) {
1147       // EOF
1148       num_bytes = 0;
1149       return Status();
1150     }
1151     auto pybuffer = PythonBuffer::Create(pybuffer_obj.get());
1152     if (!pybuffer)
1153       return Status(pybuffer.takeError());
1154     memcpy(buf, pybuffer.get().get().buf, pybuffer.get().get().len);
1155     num_bytes = pybuffer.get().get().len;
1156     return Status();
1157   }
1158 };
1159 } // namespace
1160 
1161 namespace {
1162 class TextPythonFile : public PythonIOFile {
1163 protected:
1164   int m_descriptor;
1165 
1166 public:
1167   TextPythonFile(int fd, const PythonFile &file, bool borrowed)
1168       : PythonIOFile(file, borrowed),
1169         m_descriptor(File::DescriptorIsValid(fd) ? fd
1170                                                  : File::kInvalidDescriptor) {}
1171 
1172   int GetDescriptor() const override { return m_descriptor; }
1173 
1174   Status Write(const void *buf, size_t &num_bytes) override {
1175     GIL takeGIL;
1176     auto pystring =
1177         PythonString::FromUTF8(llvm::StringRef((const char *)buf, num_bytes));
1178     if (!pystring)
1179       return Status(pystring.takeError());
1180     num_bytes = 0;
1181     auto bytes_written =
1182         As<long long>(m_py_obj.CallMethod("write", pystring.get()));
1183     if (!bytes_written)
1184       return Status(bytes_written.takeError());
1185     if (bytes_written.get() < 0)
1186       return Status(".write() method returned a negative number!");
1187     static_assert(sizeof(long long) >= sizeof(size_t), "overflow");
1188     num_bytes = bytes_written.get();
1189     return Status();
1190   }
1191 
1192   Status Read(void *buf, size_t &num_bytes) override {
1193     GIL takeGIL;
1194     size_t num_chars = num_bytes / 6;
1195     size_t orig_num_bytes = num_bytes;
1196     num_bytes = 0;
1197     if (orig_num_bytes < 6) {
1198       return Status("can't read less than 6 bytes from a utf8 text stream");
1199     }
1200     auto pystring = As<PythonString>(
1201         m_py_obj.CallMethod("read", (unsigned long long)num_chars));
1202     if (!pystring)
1203       return Status(pystring.takeError());
1204     if (pystring.get().IsNone()) {
1205       // EOF
1206       return Status();
1207     }
1208     auto stringref = pystring.get().AsUTF8();
1209     if (!stringref)
1210       return Status(stringref.takeError());
1211     num_bytes = stringref.get().size();
1212     memcpy(buf, stringref.get().begin(), num_bytes);
1213     return Status();
1214   }
1215 };
1216 } // namespace
1217 
1218 #endif
1219 
1220 llvm::Expected<FileSP> PythonFile::ConvertToFile(bool borrowed) {
1221   if (!IsValid())
1222     return llvm::createStringError(llvm::inconvertibleErrorCode(),
1223                                    "invalid PythonFile");
1224 
1225   int fd = PyObject_AsFileDescriptor(m_py_obj);
1226   if (fd < 0) {
1227     PyErr_Clear();
1228     return ConvertToFileForcingUseOfScriptingIOMethods(borrowed);
1229   }
1230   auto options = GetOptionsForPyObject(*this);
1231   if (!options)
1232     return options.takeError();
1233 
1234   // LLDB and python will not share I/O buffers.  We should probably
1235   // flush the python buffers now.
1236   auto r = CallMethod("flush");
1237   if (!r)
1238     return r.takeError();
1239 
1240   FileSP file_sp;
1241   if (borrowed) {
1242     // In this case we we don't need to retain the python
1243     // object at all.
1244     file_sp = std::make_shared<NativeFile>(fd, options.get(), false);
1245   } else {
1246     file_sp = std::static_pointer_cast<File>(
1247         std::make_shared<SimplePythonFile>(*this, borrowed, fd, options.get()));
1248   }
1249   if (!file_sp->IsValid())
1250     return llvm::createStringError(llvm::inconvertibleErrorCode(),
1251                                    "invalid File");
1252 
1253   return file_sp;
1254 }
1255 
1256 llvm::Expected<FileSP>
1257 PythonFile::ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed) {
1258 
1259   assert(!PyErr_Occurred());
1260 
1261   if (!IsValid())
1262     return llvm::createStringError(llvm::inconvertibleErrorCode(),
1263                                    "invalid PythonFile");
1264 
1265 #if PY_MAJOR_VERSION < 3
1266 
1267   return llvm::createStringError(llvm::inconvertibleErrorCode(),
1268                                  "not supported on python 2");
1269 
1270 #else
1271 
1272   int fd = PyObject_AsFileDescriptor(m_py_obj);
1273   if (fd < 0) {
1274     PyErr_Clear();
1275     fd = File::kInvalidDescriptor;
1276   }
1277 
1278   auto io_module = PythonModule::Import("io");
1279   if (!io_module)
1280     return io_module.takeError();
1281   auto textIOBase = io_module.get().Get("TextIOBase");
1282   if (!textIOBase)
1283     return textIOBase.takeError();
1284   auto rawIOBase = io_module.get().Get("RawIOBase");
1285   if (!rawIOBase)
1286     return rawIOBase.takeError();
1287   auto bufferedIOBase = io_module.get().Get("BufferedIOBase");
1288   if (!bufferedIOBase)
1289     return bufferedIOBase.takeError();
1290 
1291   FileSP file_sp;
1292 
1293   auto isTextIO = IsInstance(textIOBase.get());
1294   if (!isTextIO)
1295     return isTextIO.takeError();
1296   if (isTextIO.get())
1297     file_sp = std::static_pointer_cast<File>(
1298         std::make_shared<TextPythonFile>(fd, *this, borrowed));
1299 
1300   auto isRawIO = IsInstance(rawIOBase.get());
1301   if (!isRawIO)
1302     return isRawIO.takeError();
1303   auto isBufferedIO = IsInstance(bufferedIOBase.get());
1304   if (!isBufferedIO)
1305     return isBufferedIO.takeError();
1306 
1307   if (isRawIO.get() || isBufferedIO.get()) {
1308     file_sp = std::static_pointer_cast<File>(
1309         std::make_shared<BinaryPythonFile>(fd, *this, borrowed));
1310   }
1311 
1312   if (!file_sp)
1313     return llvm::createStringError(llvm::inconvertibleErrorCode(),
1314                                    "python file is neither text nor binary");
1315 
1316   if (!file_sp->IsValid())
1317     return llvm::createStringError(llvm::inconvertibleErrorCode(),
1318                                    "invalid File");
1319 
1320   return file_sp;
1321 
1322 #endif
1323 }
1324 
1325 Expected<PythonFile> PythonFile::FromFile(File &file, const char *mode) {
1326   if (!file.IsValid())
1327     return llvm::createStringError(llvm::inconvertibleErrorCode(),
1328                                    "invalid file");
1329 
1330   if (auto *simple = llvm::dyn_cast<SimplePythonFile>(&file))
1331     return Retain<PythonFile>(simple->GetPythonObject());
1332 #if PY_MAJOR_VERSION >= 3
1333   if (auto *pythonio = llvm::dyn_cast<PythonIOFile>(&file))
1334     return Retain<PythonFile>(pythonio->GetPythonObject());
1335 #endif
1336 
1337   if (!mode) {
1338     auto m = file.GetOpenMode();
1339     if (!m)
1340       return m.takeError();
1341     mode = m.get();
1342   }
1343 
1344   PyObject *file_obj;
1345 #if PY_MAJOR_VERSION >= 3
1346   file_obj = PyFile_FromFd(file.GetDescriptor(), nullptr, mode, -1, nullptr,
1347                            "ignore", nullptr, 0);
1348 #else
1349   // Read through the Python source, doesn't seem to modify these strings
1350   char *cmode = const_cast<char *>(mode);
1351   // We pass ::flush instead of ::fclose here so we borrow the FILE* --
1352   // the lldb_private::File still owns it.
1353   file_obj =
1354       PyFile_FromFile(file.GetStream(), const_cast<char *>(""), cmode, ::fflush);
1355 #endif
1356 
1357   if (!file_obj)
1358     return exception();
1359 
1360   return Take<PythonFile>(file_obj);
1361 }
1362 
1363 #endif
1364