xref: /freebsd-src/contrib/llvm-project/llvm/lib/Support/JSON.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "llvm/Support/JSON.h"
10e8d8bef9SDimitry Andric #include "llvm/ADT/STLExtras.h"
1106c3fb27SDimitry Andric #include "llvm/ADT/StringExtras.h"
120b57cec5SDimitry Andric #include "llvm/Support/ConvertUTF.h"
13e8d8bef9SDimitry Andric #include "llvm/Support/Error.h"
140b57cec5SDimitry Andric #include "llvm/Support/Format.h"
1504eeddc0SDimitry Andric #include "llvm/Support/NativeFormatting.h"
1606c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h"
170b57cec5SDimitry Andric #include <cctype>
1806c3fb27SDimitry Andric #include <cerrno>
19bdd1243dSDimitry Andric #include <optional>
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric namespace llvm {
220b57cec5SDimitry Andric namespace json {
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric Value &Object::operator[](const ObjectKey &K) {
250b57cec5SDimitry Andric   return try_emplace(K, nullptr).first->getSecond();
260b57cec5SDimitry Andric }
270b57cec5SDimitry Andric Value &Object::operator[](ObjectKey &&K) {
280b57cec5SDimitry Andric   return try_emplace(std::move(K), nullptr).first->getSecond();
290b57cec5SDimitry Andric }
300b57cec5SDimitry Andric Value *Object::get(StringRef K) {
310b57cec5SDimitry Andric   auto I = find(K);
320b57cec5SDimitry Andric   if (I == end())
330b57cec5SDimitry Andric     return nullptr;
340b57cec5SDimitry Andric   return &I->second;
350b57cec5SDimitry Andric }
360b57cec5SDimitry Andric const Value *Object::get(StringRef K) const {
370b57cec5SDimitry Andric   auto I = find(K);
380b57cec5SDimitry Andric   if (I == end())
390b57cec5SDimitry Andric     return nullptr;
400b57cec5SDimitry Andric   return &I->second;
410b57cec5SDimitry Andric }
42bdd1243dSDimitry Andric std::optional<std::nullptr_t> Object::getNull(StringRef K) const {
430b57cec5SDimitry Andric   if (auto *V = get(K))
440b57cec5SDimitry Andric     return V->getAsNull();
45bdd1243dSDimitry Andric   return std::nullopt;
460b57cec5SDimitry Andric }
47bdd1243dSDimitry Andric std::optional<bool> Object::getBoolean(StringRef K) const {
480b57cec5SDimitry Andric   if (auto *V = get(K))
490b57cec5SDimitry Andric     return V->getAsBoolean();
50bdd1243dSDimitry Andric   return std::nullopt;
510b57cec5SDimitry Andric }
52bdd1243dSDimitry Andric std::optional<double> Object::getNumber(StringRef K) const {
530b57cec5SDimitry Andric   if (auto *V = get(K))
540b57cec5SDimitry Andric     return V->getAsNumber();
55bdd1243dSDimitry Andric   return std::nullopt;
560b57cec5SDimitry Andric }
57bdd1243dSDimitry Andric std::optional<int64_t> Object::getInteger(StringRef K) const {
580b57cec5SDimitry Andric   if (auto *V = get(K))
590b57cec5SDimitry Andric     return V->getAsInteger();
60bdd1243dSDimitry Andric   return std::nullopt;
610b57cec5SDimitry Andric }
62bdd1243dSDimitry Andric std::optional<llvm::StringRef> Object::getString(StringRef K) const {
630b57cec5SDimitry Andric   if (auto *V = get(K))
640b57cec5SDimitry Andric     return V->getAsString();
65bdd1243dSDimitry Andric   return std::nullopt;
660b57cec5SDimitry Andric }
670b57cec5SDimitry Andric const json::Object *Object::getObject(StringRef K) const {
680b57cec5SDimitry Andric   if (auto *V = get(K))
690b57cec5SDimitry Andric     return V->getAsObject();
700b57cec5SDimitry Andric   return nullptr;
710b57cec5SDimitry Andric }
720b57cec5SDimitry Andric json::Object *Object::getObject(StringRef K) {
730b57cec5SDimitry Andric   if (auto *V = get(K))
740b57cec5SDimitry Andric     return V->getAsObject();
750b57cec5SDimitry Andric   return nullptr;
760b57cec5SDimitry Andric }
770b57cec5SDimitry Andric const json::Array *Object::getArray(StringRef K) const {
780b57cec5SDimitry Andric   if (auto *V = get(K))
790b57cec5SDimitry Andric     return V->getAsArray();
800b57cec5SDimitry Andric   return nullptr;
810b57cec5SDimitry Andric }
820b57cec5SDimitry Andric json::Array *Object::getArray(StringRef K) {
830b57cec5SDimitry Andric   if (auto *V = get(K))
840b57cec5SDimitry Andric     return V->getAsArray();
850b57cec5SDimitry Andric   return nullptr;
860b57cec5SDimitry Andric }
870b57cec5SDimitry Andric bool operator==(const Object &LHS, const Object &RHS) {
880b57cec5SDimitry Andric   if (LHS.size() != RHS.size())
890b57cec5SDimitry Andric     return false;
900b57cec5SDimitry Andric   for (const auto &L : LHS) {
910b57cec5SDimitry Andric     auto R = RHS.find(L.first);
920b57cec5SDimitry Andric     if (R == RHS.end() || L.second != R->second)
930b57cec5SDimitry Andric       return false;
940b57cec5SDimitry Andric   }
950b57cec5SDimitry Andric   return true;
960b57cec5SDimitry Andric }
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric Array::Array(std::initializer_list<Value> Elements) {
990b57cec5SDimitry Andric   V.reserve(Elements.size());
1000b57cec5SDimitry Andric   for (const Value &V : Elements) {
1010b57cec5SDimitry Andric     emplace_back(nullptr);
1020b57cec5SDimitry Andric     back().moveFrom(std::move(V));
1030b57cec5SDimitry Andric   }
1040b57cec5SDimitry Andric }
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric Value::Value(std::initializer_list<Value> Elements)
1070b57cec5SDimitry Andric     : Value(json::Array(Elements)) {}
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric void Value::copyFrom(const Value &M) {
1100b57cec5SDimitry Andric   Type = M.Type;
1110b57cec5SDimitry Andric   switch (Type) {
1120b57cec5SDimitry Andric   case T_Null:
1130b57cec5SDimitry Andric   case T_Boolean:
1140b57cec5SDimitry Andric   case T_Double:
1150b57cec5SDimitry Andric   case T_Integer:
116349cc55cSDimitry Andric   case T_UINT64:
117e8d8bef9SDimitry Andric     memcpy(&Union, &M.Union, sizeof(Union));
1180b57cec5SDimitry Andric     break;
1190b57cec5SDimitry Andric   case T_StringRef:
1200b57cec5SDimitry Andric     create<StringRef>(M.as<StringRef>());
1210b57cec5SDimitry Andric     break;
1220b57cec5SDimitry Andric   case T_String:
1230b57cec5SDimitry Andric     create<std::string>(M.as<std::string>());
1240b57cec5SDimitry Andric     break;
1250b57cec5SDimitry Andric   case T_Object:
1260b57cec5SDimitry Andric     create<json::Object>(M.as<json::Object>());
1270b57cec5SDimitry Andric     break;
1280b57cec5SDimitry Andric   case T_Array:
1290b57cec5SDimitry Andric     create<json::Array>(M.as<json::Array>());
1300b57cec5SDimitry Andric     break;
1310b57cec5SDimitry Andric   }
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric void Value::moveFrom(const Value &&M) {
1350b57cec5SDimitry Andric   Type = M.Type;
1360b57cec5SDimitry Andric   switch (Type) {
1370b57cec5SDimitry Andric   case T_Null:
1380b57cec5SDimitry Andric   case T_Boolean:
1390b57cec5SDimitry Andric   case T_Double:
1400b57cec5SDimitry Andric   case T_Integer:
141349cc55cSDimitry Andric   case T_UINT64:
142e8d8bef9SDimitry Andric     memcpy(&Union, &M.Union, sizeof(Union));
1430b57cec5SDimitry Andric     break;
1440b57cec5SDimitry Andric   case T_StringRef:
1450b57cec5SDimitry Andric     create<StringRef>(M.as<StringRef>());
1460b57cec5SDimitry Andric     break;
1470b57cec5SDimitry Andric   case T_String:
1480b57cec5SDimitry Andric     create<std::string>(std::move(M.as<std::string>()));
1490b57cec5SDimitry Andric     M.Type = T_Null;
1500b57cec5SDimitry Andric     break;
1510b57cec5SDimitry Andric   case T_Object:
1520b57cec5SDimitry Andric     create<json::Object>(std::move(M.as<json::Object>()));
1530b57cec5SDimitry Andric     M.Type = T_Null;
1540b57cec5SDimitry Andric     break;
1550b57cec5SDimitry Andric   case T_Array:
1560b57cec5SDimitry Andric     create<json::Array>(std::move(M.as<json::Array>()));
1570b57cec5SDimitry Andric     M.Type = T_Null;
1580b57cec5SDimitry Andric     break;
1590b57cec5SDimitry Andric   }
1600b57cec5SDimitry Andric }
1610b57cec5SDimitry Andric 
1620b57cec5SDimitry Andric void Value::destroy() {
1630b57cec5SDimitry Andric   switch (Type) {
1640b57cec5SDimitry Andric   case T_Null:
1650b57cec5SDimitry Andric   case T_Boolean:
1660b57cec5SDimitry Andric   case T_Double:
1670b57cec5SDimitry Andric   case T_Integer:
168349cc55cSDimitry Andric   case T_UINT64:
1690b57cec5SDimitry Andric     break;
1700b57cec5SDimitry Andric   case T_StringRef:
1710b57cec5SDimitry Andric     as<StringRef>().~StringRef();
1720b57cec5SDimitry Andric     break;
1730b57cec5SDimitry Andric   case T_String:
1740b57cec5SDimitry Andric     as<std::string>().~basic_string();
1750b57cec5SDimitry Andric     break;
1760b57cec5SDimitry Andric   case T_Object:
1770b57cec5SDimitry Andric     as<json::Object>().~Object();
1780b57cec5SDimitry Andric     break;
1790b57cec5SDimitry Andric   case T_Array:
1800b57cec5SDimitry Andric     as<json::Array>().~Array();
1810b57cec5SDimitry Andric     break;
1820b57cec5SDimitry Andric   }
1830b57cec5SDimitry Andric }
1840b57cec5SDimitry Andric 
1850b57cec5SDimitry Andric bool operator==(const Value &L, const Value &R) {
1860b57cec5SDimitry Andric   if (L.kind() != R.kind())
1870b57cec5SDimitry Andric     return false;
1880b57cec5SDimitry Andric   switch (L.kind()) {
1890b57cec5SDimitry Andric   case Value::Null:
1900b57cec5SDimitry Andric     return *L.getAsNull() == *R.getAsNull();
1910b57cec5SDimitry Andric   case Value::Boolean:
1920b57cec5SDimitry Andric     return *L.getAsBoolean() == *R.getAsBoolean();
1930b57cec5SDimitry Andric   case Value::Number:
1940b57cec5SDimitry Andric     // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
1950b57cec5SDimitry Andric     // The same integer must convert to the same double, per the standard.
1960b57cec5SDimitry Andric     // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
1970b57cec5SDimitry Andric     // So we avoid floating point promotion for exact comparisons.
1980b57cec5SDimitry Andric     if (L.Type == Value::T_Integer || R.Type == Value::T_Integer)
1990b57cec5SDimitry Andric       return L.getAsInteger() == R.getAsInteger();
2000b57cec5SDimitry Andric     return *L.getAsNumber() == *R.getAsNumber();
2010b57cec5SDimitry Andric   case Value::String:
2020b57cec5SDimitry Andric     return *L.getAsString() == *R.getAsString();
2030b57cec5SDimitry Andric   case Value::Array:
2040b57cec5SDimitry Andric     return *L.getAsArray() == *R.getAsArray();
2050b57cec5SDimitry Andric   case Value::Object:
2060b57cec5SDimitry Andric     return *L.getAsObject() == *R.getAsObject();
2070b57cec5SDimitry Andric   }
2080b57cec5SDimitry Andric   llvm_unreachable("Unknown value kind");
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric 
211e8d8bef9SDimitry Andric void Path::report(llvm::StringLiteral Msg) {
212e8d8bef9SDimitry Andric   // Walk up to the root context, and count the number of segments.
213e8d8bef9SDimitry Andric   unsigned Count = 0;
214e8d8bef9SDimitry Andric   const Path *P;
215e8d8bef9SDimitry Andric   for (P = this; P->Parent != nullptr; P = P->Parent)
216e8d8bef9SDimitry Andric     ++Count;
217e8d8bef9SDimitry Andric   Path::Root *R = P->Seg.root();
218e8d8bef9SDimitry Andric   // Fill in the error message and copy the path (in reverse order).
219e8d8bef9SDimitry Andric   R->ErrorMessage = Msg;
220e8d8bef9SDimitry Andric   R->ErrorPath.resize(Count);
221e8d8bef9SDimitry Andric   auto It = R->ErrorPath.begin();
222e8d8bef9SDimitry Andric   for (P = this; P->Parent != nullptr; P = P->Parent)
223e8d8bef9SDimitry Andric     *It++ = P->Seg;
224e8d8bef9SDimitry Andric }
225e8d8bef9SDimitry Andric 
226e8d8bef9SDimitry Andric Error Path::Root::getError() const {
227e8d8bef9SDimitry Andric   std::string S;
228e8d8bef9SDimitry Andric   raw_string_ostream OS(S);
229e8d8bef9SDimitry Andric   OS << (ErrorMessage.empty() ? "invalid JSON contents" : ErrorMessage);
230e8d8bef9SDimitry Andric   if (ErrorPath.empty()) {
231e8d8bef9SDimitry Andric     if (!Name.empty())
232e8d8bef9SDimitry Andric       OS << " when parsing " << Name;
233e8d8bef9SDimitry Andric   } else {
234e8d8bef9SDimitry Andric     OS << " at " << (Name.empty() ? "(root)" : Name);
235e8d8bef9SDimitry Andric     for (const Path::Segment &S : llvm::reverse(ErrorPath)) {
236e8d8bef9SDimitry Andric       if (S.isField())
237e8d8bef9SDimitry Andric         OS << '.' << S.field();
238e8d8bef9SDimitry Andric       else
239e8d8bef9SDimitry Andric         OS << '[' << S.index() << ']';
240e8d8bef9SDimitry Andric     }
241e8d8bef9SDimitry Andric   }
242*0fca6ea1SDimitry Andric   return createStringError(llvm::inconvertibleErrorCode(), S);
243e8d8bef9SDimitry Andric }
244e8d8bef9SDimitry Andric 
245e8d8bef9SDimitry Andric std::vector<const Object::value_type *> sortedElements(const Object &O) {
246e8d8bef9SDimitry Andric   std::vector<const Object::value_type *> Elements;
247e8d8bef9SDimitry Andric   for (const auto &E : O)
248e8d8bef9SDimitry Andric     Elements.push_back(&E);
249e8d8bef9SDimitry Andric   llvm::sort(Elements,
250e8d8bef9SDimitry Andric              [](const Object::value_type *L, const Object::value_type *R) {
251e8d8bef9SDimitry Andric                return L->first < R->first;
252e8d8bef9SDimitry Andric              });
253e8d8bef9SDimitry Andric   return Elements;
254e8d8bef9SDimitry Andric }
255e8d8bef9SDimitry Andric 
256e8d8bef9SDimitry Andric // Prints a one-line version of a value that isn't our main focus.
257e8d8bef9SDimitry Andric // We interleave writes to OS and JOS, exploiting the lack of extra buffering.
258e8d8bef9SDimitry Andric // This is OK as we own the implementation.
259*0fca6ea1SDimitry Andric static void abbreviate(const Value &V, OStream &JOS) {
260e8d8bef9SDimitry Andric   switch (V.kind()) {
261e8d8bef9SDimitry Andric   case Value::Array:
262e8d8bef9SDimitry Andric     JOS.rawValue(V.getAsArray()->empty() ? "[]" : "[ ... ]");
263e8d8bef9SDimitry Andric     break;
264e8d8bef9SDimitry Andric   case Value::Object:
265e8d8bef9SDimitry Andric     JOS.rawValue(V.getAsObject()->empty() ? "{}" : "{ ... }");
266e8d8bef9SDimitry Andric     break;
267e8d8bef9SDimitry Andric   case Value::String: {
268e8d8bef9SDimitry Andric     llvm::StringRef S = *V.getAsString();
269e8d8bef9SDimitry Andric     if (S.size() < 40) {
270e8d8bef9SDimitry Andric       JOS.value(V);
271e8d8bef9SDimitry Andric     } else {
272e8d8bef9SDimitry Andric       std::string Truncated = fixUTF8(S.take_front(37));
273e8d8bef9SDimitry Andric       Truncated.append("...");
274e8d8bef9SDimitry Andric       JOS.value(Truncated);
275e8d8bef9SDimitry Andric     }
276e8d8bef9SDimitry Andric     break;
277e8d8bef9SDimitry Andric   }
278e8d8bef9SDimitry Andric   default:
279e8d8bef9SDimitry Andric     JOS.value(V);
280e8d8bef9SDimitry Andric   }
281e8d8bef9SDimitry Andric }
282e8d8bef9SDimitry Andric 
283e8d8bef9SDimitry Andric // Prints a semi-expanded version of a value that is our main focus.
284e8d8bef9SDimitry Andric // Array/Object entries are printed, but not recursively as they may be huge.
285*0fca6ea1SDimitry Andric static void abbreviateChildren(const Value &V, OStream &JOS) {
286e8d8bef9SDimitry Andric   switch (V.kind()) {
287e8d8bef9SDimitry Andric   case Value::Array:
288e8d8bef9SDimitry Andric     JOS.array([&] {
289e8d8bef9SDimitry Andric       for (const auto &I : *V.getAsArray())
290e8d8bef9SDimitry Andric         abbreviate(I, JOS);
291e8d8bef9SDimitry Andric     });
292e8d8bef9SDimitry Andric     break;
293e8d8bef9SDimitry Andric   case Value::Object:
294e8d8bef9SDimitry Andric     JOS.object([&] {
295e8d8bef9SDimitry Andric       for (const auto *KV : sortedElements(*V.getAsObject())) {
296e8d8bef9SDimitry Andric         JOS.attributeBegin(KV->first);
297e8d8bef9SDimitry Andric         abbreviate(KV->second, JOS);
298e8d8bef9SDimitry Andric         JOS.attributeEnd();
299e8d8bef9SDimitry Andric       }
300e8d8bef9SDimitry Andric     });
301e8d8bef9SDimitry Andric     break;
302e8d8bef9SDimitry Andric   default:
303e8d8bef9SDimitry Andric     JOS.value(V);
304e8d8bef9SDimitry Andric   }
305e8d8bef9SDimitry Andric }
306e8d8bef9SDimitry Andric 
307e8d8bef9SDimitry Andric void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const {
308e8d8bef9SDimitry Andric   OStream JOS(OS, /*IndentSize=*/2);
309e8d8bef9SDimitry Andric   // PrintValue recurses down the path, printing the ancestors of our target.
310e8d8bef9SDimitry Andric   // Siblings of nodes along the path are printed with abbreviate(), and the
311e8d8bef9SDimitry Andric   // target itself is printed with the somewhat richer abbreviateChildren().
312e8d8bef9SDimitry Andric   // 'Recurse' is the lambda itself, to allow recursive calls.
313e8d8bef9SDimitry Andric   auto PrintValue = [&](const Value &V, ArrayRef<Segment> Path, auto &Recurse) {
314e8d8bef9SDimitry Andric     // Print the target node itself, with the error as a comment.
315e8d8bef9SDimitry Andric     // Also used if we can't follow our path, e.g. it names a field that
316e8d8bef9SDimitry Andric     // *should* exist but doesn't.
317e8d8bef9SDimitry Andric     auto HighlightCurrent = [&] {
318e8d8bef9SDimitry Andric       std::string Comment = "error: ";
319e8d8bef9SDimitry Andric       Comment.append(ErrorMessage.data(), ErrorMessage.size());
320e8d8bef9SDimitry Andric       JOS.comment(Comment);
321e8d8bef9SDimitry Andric       abbreviateChildren(V, JOS);
322e8d8bef9SDimitry Andric     };
323e8d8bef9SDimitry Andric     if (Path.empty()) // We reached our target.
324e8d8bef9SDimitry Andric       return HighlightCurrent();
325e8d8bef9SDimitry Andric     const Segment &S = Path.back(); // Path is in reverse order.
326e8d8bef9SDimitry Andric     if (S.isField()) {
327e8d8bef9SDimitry Andric       // Current node is an object, path names a field.
328e8d8bef9SDimitry Andric       llvm::StringRef FieldName = S.field();
329e8d8bef9SDimitry Andric       const Object *O = V.getAsObject();
330e8d8bef9SDimitry Andric       if (!O || !O->get(FieldName))
331e8d8bef9SDimitry Andric         return HighlightCurrent();
332e8d8bef9SDimitry Andric       JOS.object([&] {
333e8d8bef9SDimitry Andric         for (const auto *KV : sortedElements(*O)) {
334e8d8bef9SDimitry Andric           JOS.attributeBegin(KV->first);
335*0fca6ea1SDimitry Andric           if (FieldName == StringRef(KV->first))
336e8d8bef9SDimitry Andric             Recurse(KV->second, Path.drop_back(), Recurse);
337e8d8bef9SDimitry Andric           else
338e8d8bef9SDimitry Andric             abbreviate(KV->second, JOS);
339e8d8bef9SDimitry Andric           JOS.attributeEnd();
340e8d8bef9SDimitry Andric         }
341e8d8bef9SDimitry Andric       });
342e8d8bef9SDimitry Andric     } else {
343e8d8bef9SDimitry Andric       // Current node is an array, path names an element.
344e8d8bef9SDimitry Andric       const Array *A = V.getAsArray();
345e8d8bef9SDimitry Andric       if (!A || S.index() >= A->size())
346e8d8bef9SDimitry Andric         return HighlightCurrent();
347e8d8bef9SDimitry Andric       JOS.array([&] {
348e8d8bef9SDimitry Andric         unsigned Current = 0;
349e8d8bef9SDimitry Andric         for (const auto &V : *A) {
350e8d8bef9SDimitry Andric           if (Current++ == S.index())
351e8d8bef9SDimitry Andric             Recurse(V, Path.drop_back(), Recurse);
352e8d8bef9SDimitry Andric           else
353e8d8bef9SDimitry Andric             abbreviate(V, JOS);
354e8d8bef9SDimitry Andric         }
355e8d8bef9SDimitry Andric       });
356e8d8bef9SDimitry Andric     }
357e8d8bef9SDimitry Andric   };
358e8d8bef9SDimitry Andric   PrintValue(R, ErrorPath, PrintValue);
359e8d8bef9SDimitry Andric }
360e8d8bef9SDimitry Andric 
3610b57cec5SDimitry Andric namespace {
3620b57cec5SDimitry Andric // Simple recursive-descent JSON parser.
3630b57cec5SDimitry Andric class Parser {
3640b57cec5SDimitry Andric public:
3650b57cec5SDimitry Andric   Parser(StringRef JSON)
3660b57cec5SDimitry Andric       : Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}
3670b57cec5SDimitry Andric 
3680b57cec5SDimitry Andric   bool checkUTF8() {
3690b57cec5SDimitry Andric     size_t ErrOffset;
3700b57cec5SDimitry Andric     if (isUTF8(StringRef(Start, End - Start), &ErrOffset))
3710b57cec5SDimitry Andric       return true;
3720b57cec5SDimitry Andric     P = Start + ErrOffset; // For line/column calculation.
3730b57cec5SDimitry Andric     return parseError("Invalid UTF-8 sequence");
3740b57cec5SDimitry Andric   }
3750b57cec5SDimitry Andric 
3760b57cec5SDimitry Andric   bool parseValue(Value &Out);
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric   bool assertEnd() {
3790b57cec5SDimitry Andric     eatWhitespace();
3800b57cec5SDimitry Andric     if (P == End)
3810b57cec5SDimitry Andric       return true;
3820b57cec5SDimitry Andric     return parseError("Text after end of document");
3830b57cec5SDimitry Andric   }
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric   Error takeError() {
3860b57cec5SDimitry Andric     assert(Err);
3870b57cec5SDimitry Andric     return std::move(*Err);
3880b57cec5SDimitry Andric   }
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric private:
3910b57cec5SDimitry Andric   void eatWhitespace() {
3920b57cec5SDimitry Andric     while (P != End && (*P == ' ' || *P == '\r' || *P == '\n' || *P == '\t'))
3930b57cec5SDimitry Andric       ++P;
3940b57cec5SDimitry Andric   }
3950b57cec5SDimitry Andric 
3960b57cec5SDimitry Andric   // On invalid syntax, parseX() functions return false and set Err.
3970b57cec5SDimitry Andric   bool parseNumber(char First, Value &Out);
3980b57cec5SDimitry Andric   bool parseString(std::string &Out);
3990b57cec5SDimitry Andric   bool parseUnicode(std::string &Out);
4000b57cec5SDimitry Andric   bool parseError(const char *Msg); // always returns false
4010b57cec5SDimitry Andric 
4020b57cec5SDimitry Andric   char next() { return P == End ? 0 : *P++; }
4030b57cec5SDimitry Andric   char peek() { return P == End ? 0 : *P; }
4040b57cec5SDimitry Andric   static bool isNumber(char C) {
4050b57cec5SDimitry Andric     return C == '0' || C == '1' || C == '2' || C == '3' || C == '4' ||
4060b57cec5SDimitry Andric            C == '5' || C == '6' || C == '7' || C == '8' || C == '9' ||
4070b57cec5SDimitry Andric            C == 'e' || C == 'E' || C == '+' || C == '-' || C == '.';
4080b57cec5SDimitry Andric   }
4090b57cec5SDimitry Andric 
410bdd1243dSDimitry Andric   std::optional<Error> Err;
4110b57cec5SDimitry Andric   const char *Start, *P, *End;
4120b57cec5SDimitry Andric };
413*0fca6ea1SDimitry Andric } // namespace
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric bool Parser::parseValue(Value &Out) {
4160b57cec5SDimitry Andric   eatWhitespace();
4170b57cec5SDimitry Andric   if (P == End)
4180b57cec5SDimitry Andric     return parseError("Unexpected EOF");
4190b57cec5SDimitry Andric   switch (char C = next()) {
4200b57cec5SDimitry Andric   // Bare null/true/false are easy - first char identifies them.
4210b57cec5SDimitry Andric   case 'n':
4220b57cec5SDimitry Andric     Out = nullptr;
4230b57cec5SDimitry Andric     return (next() == 'u' && next() == 'l' && next() == 'l') ||
4240b57cec5SDimitry Andric            parseError("Invalid JSON value (null?)");
4250b57cec5SDimitry Andric   case 't':
4260b57cec5SDimitry Andric     Out = true;
4270b57cec5SDimitry Andric     return (next() == 'r' && next() == 'u' && next() == 'e') ||
4280b57cec5SDimitry Andric            parseError("Invalid JSON value (true?)");
4290b57cec5SDimitry Andric   case 'f':
4300b57cec5SDimitry Andric     Out = false;
4310b57cec5SDimitry Andric     return (next() == 'a' && next() == 'l' && next() == 's' && next() == 'e') ||
4320b57cec5SDimitry Andric            parseError("Invalid JSON value (false?)");
4330b57cec5SDimitry Andric   case '"': {
4340b57cec5SDimitry Andric     std::string S;
4350b57cec5SDimitry Andric     if (parseString(S)) {
4360b57cec5SDimitry Andric       Out = std::move(S);
4370b57cec5SDimitry Andric       return true;
4380b57cec5SDimitry Andric     }
4390b57cec5SDimitry Andric     return false;
4400b57cec5SDimitry Andric   }
4410b57cec5SDimitry Andric   case '[': {
4420b57cec5SDimitry Andric     Out = Array{};
4430b57cec5SDimitry Andric     Array &A = *Out.getAsArray();
4440b57cec5SDimitry Andric     eatWhitespace();
4450b57cec5SDimitry Andric     if (peek() == ']') {
4460b57cec5SDimitry Andric       ++P;
4470b57cec5SDimitry Andric       return true;
4480b57cec5SDimitry Andric     }
4490b57cec5SDimitry Andric     for (;;) {
4500b57cec5SDimitry Andric       A.emplace_back(nullptr);
4510b57cec5SDimitry Andric       if (!parseValue(A.back()))
4520b57cec5SDimitry Andric         return false;
4530b57cec5SDimitry Andric       eatWhitespace();
4540b57cec5SDimitry Andric       switch (next()) {
4550b57cec5SDimitry Andric       case ',':
4560b57cec5SDimitry Andric         eatWhitespace();
4570b57cec5SDimitry Andric         continue;
4580b57cec5SDimitry Andric       case ']':
4590b57cec5SDimitry Andric         return true;
4600b57cec5SDimitry Andric       default:
4610b57cec5SDimitry Andric         return parseError("Expected , or ] after array element");
4620b57cec5SDimitry Andric       }
4630b57cec5SDimitry Andric     }
4640b57cec5SDimitry Andric   }
4650b57cec5SDimitry Andric   case '{': {
4660b57cec5SDimitry Andric     Out = Object{};
4670b57cec5SDimitry Andric     Object &O = *Out.getAsObject();
4680b57cec5SDimitry Andric     eatWhitespace();
4690b57cec5SDimitry Andric     if (peek() == '}') {
4700b57cec5SDimitry Andric       ++P;
4710b57cec5SDimitry Andric       return true;
4720b57cec5SDimitry Andric     }
4730b57cec5SDimitry Andric     for (;;) {
4740b57cec5SDimitry Andric       if (next() != '"')
4750b57cec5SDimitry Andric         return parseError("Expected object key");
4760b57cec5SDimitry Andric       std::string K;
4770b57cec5SDimitry Andric       if (!parseString(K))
4780b57cec5SDimitry Andric         return false;
4790b57cec5SDimitry Andric       eatWhitespace();
4800b57cec5SDimitry Andric       if (next() != ':')
4810b57cec5SDimitry Andric         return parseError("Expected : after object key");
4820b57cec5SDimitry Andric       eatWhitespace();
4830b57cec5SDimitry Andric       if (!parseValue(O[std::move(K)]))
4840b57cec5SDimitry Andric         return false;
4850b57cec5SDimitry Andric       eatWhitespace();
4860b57cec5SDimitry Andric       switch (next()) {
4870b57cec5SDimitry Andric       case ',':
4880b57cec5SDimitry Andric         eatWhitespace();
4890b57cec5SDimitry Andric         continue;
4900b57cec5SDimitry Andric       case '}':
4910b57cec5SDimitry Andric         return true;
4920b57cec5SDimitry Andric       default:
4930b57cec5SDimitry Andric         return parseError("Expected , or } after object property");
4940b57cec5SDimitry Andric       }
4950b57cec5SDimitry Andric     }
4960b57cec5SDimitry Andric   }
4970b57cec5SDimitry Andric   default:
4980b57cec5SDimitry Andric     if (isNumber(C))
4990b57cec5SDimitry Andric       return parseNumber(C, Out);
5000b57cec5SDimitry Andric     return parseError("Invalid JSON value");
5010b57cec5SDimitry Andric   }
5020b57cec5SDimitry Andric }
5030b57cec5SDimitry Andric 
5040b57cec5SDimitry Andric bool Parser::parseNumber(char First, Value &Out) {
5050b57cec5SDimitry Andric   // Read the number into a string. (Must be null-terminated for strto*).
5060b57cec5SDimitry Andric   SmallString<24> S;
5070b57cec5SDimitry Andric   S.push_back(First);
5080b57cec5SDimitry Andric   while (isNumber(peek()))
5090b57cec5SDimitry Andric     S.push_back(next());
5100b57cec5SDimitry Andric   char *End;
5110b57cec5SDimitry Andric   // Try first to parse as integer, and if so preserve full 64 bits.
51281ad6265SDimitry Andric   // We check for errno for out of bounds errors and for End == S.end()
51381ad6265SDimitry Andric   // to make sure that the numeric string is not malformed.
51481ad6265SDimitry Andric   errno = 0;
51581ad6265SDimitry Andric   int64_t I = std::strtoll(S.c_str(), &End, 10);
51681ad6265SDimitry Andric   if (End == S.end() && errno != ERANGE) {
5170b57cec5SDimitry Andric     Out = int64_t(I);
5180b57cec5SDimitry Andric     return true;
5190b57cec5SDimitry Andric   }
52081ad6265SDimitry Andric   // strtroull has a special handling for negative numbers, but in this
52181ad6265SDimitry Andric   // case we don't want to do that because negative numbers were already
52281ad6265SDimitry Andric   // handled in the previous block.
52381ad6265SDimitry Andric   if (First != '-') {
52481ad6265SDimitry Andric     errno = 0;
52581ad6265SDimitry Andric     uint64_t UI = std::strtoull(S.c_str(), &End, 10);
52681ad6265SDimitry Andric     if (End == S.end() && errno != ERANGE) {
52781ad6265SDimitry Andric       Out = UI;
52881ad6265SDimitry Andric       return true;
52981ad6265SDimitry Andric     }
53081ad6265SDimitry Andric   }
5310b57cec5SDimitry Andric   // If it's not an integer
5320b57cec5SDimitry Andric   Out = std::strtod(S.c_str(), &End);
5330b57cec5SDimitry Andric   return End == S.end() || parseError("Invalid JSON value (number?)");
5340b57cec5SDimitry Andric }
5350b57cec5SDimitry Andric 
5360b57cec5SDimitry Andric bool Parser::parseString(std::string &Out) {
5370b57cec5SDimitry Andric   // leading quote was already consumed.
5380b57cec5SDimitry Andric   for (char C = next(); C != '"'; C = next()) {
5390b57cec5SDimitry Andric     if (LLVM_UNLIKELY(P == End))
5400b57cec5SDimitry Andric       return parseError("Unterminated string");
5410b57cec5SDimitry Andric     if (LLVM_UNLIKELY((C & 0x1f) == C))
5420b57cec5SDimitry Andric       return parseError("Control character in string");
5430b57cec5SDimitry Andric     if (LLVM_LIKELY(C != '\\')) {
5440b57cec5SDimitry Andric       Out.push_back(C);
5450b57cec5SDimitry Andric       continue;
5460b57cec5SDimitry Andric     }
5470b57cec5SDimitry Andric     // Handle escape sequence.
5480b57cec5SDimitry Andric     switch (C = next()) {
5490b57cec5SDimitry Andric     case '"':
5500b57cec5SDimitry Andric     case '\\':
5510b57cec5SDimitry Andric     case '/':
5520b57cec5SDimitry Andric       Out.push_back(C);
5530b57cec5SDimitry Andric       break;
5540b57cec5SDimitry Andric     case 'b':
5550b57cec5SDimitry Andric       Out.push_back('\b');
5560b57cec5SDimitry Andric       break;
5570b57cec5SDimitry Andric     case 'f':
5580b57cec5SDimitry Andric       Out.push_back('\f');
5590b57cec5SDimitry Andric       break;
5600b57cec5SDimitry Andric     case 'n':
5610b57cec5SDimitry Andric       Out.push_back('\n');
5620b57cec5SDimitry Andric       break;
5630b57cec5SDimitry Andric     case 'r':
5640b57cec5SDimitry Andric       Out.push_back('\r');
5650b57cec5SDimitry Andric       break;
5660b57cec5SDimitry Andric     case 't':
5670b57cec5SDimitry Andric       Out.push_back('\t');
5680b57cec5SDimitry Andric       break;
5690b57cec5SDimitry Andric     case 'u':
5700b57cec5SDimitry Andric       if (!parseUnicode(Out))
5710b57cec5SDimitry Andric         return false;
5720b57cec5SDimitry Andric       break;
5730b57cec5SDimitry Andric     default:
5740b57cec5SDimitry Andric       return parseError("Invalid escape sequence");
5750b57cec5SDimitry Andric     }
5760b57cec5SDimitry Andric   }
5770b57cec5SDimitry Andric   return true;
5780b57cec5SDimitry Andric }
5790b57cec5SDimitry Andric 
5800b57cec5SDimitry Andric static void encodeUtf8(uint32_t Rune, std::string &Out) {
5810b57cec5SDimitry Andric   if (Rune < 0x80) {
5820b57cec5SDimitry Andric     Out.push_back(Rune & 0x7F);
5830b57cec5SDimitry Andric   } else if (Rune < 0x800) {
5840b57cec5SDimitry Andric     uint8_t FirstByte = 0xC0 | ((Rune & 0x7C0) >> 6);
5850b57cec5SDimitry Andric     uint8_t SecondByte = 0x80 | (Rune & 0x3F);
5860b57cec5SDimitry Andric     Out.push_back(FirstByte);
5870b57cec5SDimitry Andric     Out.push_back(SecondByte);
5880b57cec5SDimitry Andric   } else if (Rune < 0x10000) {
5890b57cec5SDimitry Andric     uint8_t FirstByte = 0xE0 | ((Rune & 0xF000) >> 12);
5900b57cec5SDimitry Andric     uint8_t SecondByte = 0x80 | ((Rune & 0xFC0) >> 6);
5910b57cec5SDimitry Andric     uint8_t ThirdByte = 0x80 | (Rune & 0x3F);
5920b57cec5SDimitry Andric     Out.push_back(FirstByte);
5930b57cec5SDimitry Andric     Out.push_back(SecondByte);
5940b57cec5SDimitry Andric     Out.push_back(ThirdByte);
5950b57cec5SDimitry Andric   } else if (Rune < 0x110000) {
5960b57cec5SDimitry Andric     uint8_t FirstByte = 0xF0 | ((Rune & 0x1F0000) >> 18);
5970b57cec5SDimitry Andric     uint8_t SecondByte = 0x80 | ((Rune & 0x3F000) >> 12);
5980b57cec5SDimitry Andric     uint8_t ThirdByte = 0x80 | ((Rune & 0xFC0) >> 6);
5990b57cec5SDimitry Andric     uint8_t FourthByte = 0x80 | (Rune & 0x3F);
6000b57cec5SDimitry Andric     Out.push_back(FirstByte);
6010b57cec5SDimitry Andric     Out.push_back(SecondByte);
6020b57cec5SDimitry Andric     Out.push_back(ThirdByte);
6030b57cec5SDimitry Andric     Out.push_back(FourthByte);
6040b57cec5SDimitry Andric   } else {
6050b57cec5SDimitry Andric     llvm_unreachable("Invalid codepoint");
6060b57cec5SDimitry Andric   }
6070b57cec5SDimitry Andric }
6080b57cec5SDimitry Andric 
6090b57cec5SDimitry Andric // Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed.
6100b57cec5SDimitry Andric // May parse several sequential escapes to ensure proper surrogate handling.
6110b57cec5SDimitry Andric // We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates.
6120b57cec5SDimitry Andric // These are invalid Unicode but valid JSON (RFC 8259, section 8.2).
6130b57cec5SDimitry Andric bool Parser::parseUnicode(std::string &Out) {
6140b57cec5SDimitry Andric   // Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD.
6150b57cec5SDimitry Andric   auto Invalid = [&] { Out.append(/* UTF-8 */ {'\xef', '\xbf', '\xbd'}); };
6160b57cec5SDimitry Andric   // Decodes 4 hex digits from the stream into Out, returns false on error.
6170b57cec5SDimitry Andric   auto Parse4Hex = [this](uint16_t &Out) -> bool {
6180b57cec5SDimitry Andric     Out = 0;
6190b57cec5SDimitry Andric     char Bytes[] = {next(), next(), next(), next()};
6200b57cec5SDimitry Andric     for (unsigned char C : Bytes) {
6210b57cec5SDimitry Andric       if (!std::isxdigit(C))
6220b57cec5SDimitry Andric         return parseError("Invalid \\u escape sequence");
6230b57cec5SDimitry Andric       Out <<= 4;
6240b57cec5SDimitry Andric       Out |= (C > '9') ? (C & ~0x20) - 'A' + 10 : (C - '0');
6250b57cec5SDimitry Andric     }
6260b57cec5SDimitry Andric     return true;
6270b57cec5SDimitry Andric   };
6280b57cec5SDimitry Andric   uint16_t First; // UTF-16 code unit from the first \u escape.
6290b57cec5SDimitry Andric   if (!Parse4Hex(First))
6300b57cec5SDimitry Andric     return false;
6310b57cec5SDimitry Andric 
6320b57cec5SDimitry Andric   // We loop to allow proper surrogate-pair error handling.
6330b57cec5SDimitry Andric   while (true) {
6340b57cec5SDimitry Andric     // Case 1: the UTF-16 code unit is already a codepoint in the BMP.
6350b57cec5SDimitry Andric     if (LLVM_LIKELY(First < 0xD800 || First >= 0xE000)) {
6360b57cec5SDimitry Andric       encodeUtf8(First, Out);
6370b57cec5SDimitry Andric       return true;
6380b57cec5SDimitry Andric     }
6390b57cec5SDimitry Andric 
6400b57cec5SDimitry Andric     // Case 2: it's an (unpaired) trailing surrogate.
6410b57cec5SDimitry Andric     if (LLVM_UNLIKELY(First >= 0xDC00)) {
6420b57cec5SDimitry Andric       Invalid();
6430b57cec5SDimitry Andric       return true;
6440b57cec5SDimitry Andric     }
6450b57cec5SDimitry Andric 
6460b57cec5SDimitry Andric     // Case 3: it's a leading surrogate. We expect a trailing one next.
6470b57cec5SDimitry Andric     // Case 3a: there's no trailing \u escape. Don't advance in the stream.
6480b57cec5SDimitry Andric     if (LLVM_UNLIKELY(P + 2 > End || *P != '\\' || *(P + 1) != 'u')) {
6490b57cec5SDimitry Andric       Invalid(); // Leading surrogate was unpaired.
6500b57cec5SDimitry Andric       return true;
6510b57cec5SDimitry Andric     }
6520b57cec5SDimitry Andric     P += 2;
6530b57cec5SDimitry Andric     uint16_t Second;
6540b57cec5SDimitry Andric     if (!Parse4Hex(Second))
6550b57cec5SDimitry Andric       return false;
6560b57cec5SDimitry Andric     // Case 3b: there was another \u escape, but it wasn't a trailing surrogate.
6570b57cec5SDimitry Andric     if (LLVM_UNLIKELY(Second < 0xDC00 || Second >= 0xE000)) {
6580b57cec5SDimitry Andric       Invalid();      // Leading surrogate was unpaired.
6590b57cec5SDimitry Andric       First = Second; // Second escape still needs to be processed.
6600b57cec5SDimitry Andric       continue;
6610b57cec5SDimitry Andric     }
6620b57cec5SDimitry Andric     // Case 3c: a valid surrogate pair encoding an astral codepoint.
6630b57cec5SDimitry Andric     encodeUtf8(0x10000 | ((First - 0xD800) << 10) | (Second - 0xDC00), Out);
6640b57cec5SDimitry Andric     return true;
6650b57cec5SDimitry Andric   }
6660b57cec5SDimitry Andric }
6670b57cec5SDimitry Andric 
6680b57cec5SDimitry Andric bool Parser::parseError(const char *Msg) {
6690b57cec5SDimitry Andric   int Line = 1;
6700b57cec5SDimitry Andric   const char *StartOfLine = Start;
6710b57cec5SDimitry Andric   for (const char *X = Start; X < P; ++X) {
6720b57cec5SDimitry Andric     if (*X == 0x0A) {
6730b57cec5SDimitry Andric       ++Line;
6740b57cec5SDimitry Andric       StartOfLine = X + 1;
6750b57cec5SDimitry Andric     }
6760b57cec5SDimitry Andric   }
6770b57cec5SDimitry Andric   Err.emplace(
6788bcb0991SDimitry Andric       std::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start));
6790b57cec5SDimitry Andric   return false;
6800b57cec5SDimitry Andric }
6810b57cec5SDimitry Andric 
6820b57cec5SDimitry Andric Expected<Value> parse(StringRef JSON) {
6830b57cec5SDimitry Andric   Parser P(JSON);
6840b57cec5SDimitry Andric   Value E = nullptr;
6850b57cec5SDimitry Andric   if (P.checkUTF8())
6860b57cec5SDimitry Andric     if (P.parseValue(E))
6870b57cec5SDimitry Andric       if (P.assertEnd())
6880b57cec5SDimitry Andric         return std::move(E);
6890b57cec5SDimitry Andric   return P.takeError();
6900b57cec5SDimitry Andric }
6910b57cec5SDimitry Andric char ParseError::ID = 0;
6920b57cec5SDimitry Andric 
6930b57cec5SDimitry Andric bool isUTF8(llvm::StringRef S, size_t *ErrOffset) {
6940b57cec5SDimitry Andric   // Fast-path for ASCII, which is valid UTF-8.
6950b57cec5SDimitry Andric   if (LLVM_LIKELY(isASCII(S)))
6960b57cec5SDimitry Andric     return true;
6970b57cec5SDimitry Andric 
6980b57cec5SDimitry Andric   const UTF8 *Data = reinterpret_cast<const UTF8 *>(S.data()), *Rest = Data;
6990b57cec5SDimitry Andric   if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size())))
7000b57cec5SDimitry Andric     return true;
7010b57cec5SDimitry Andric 
7020b57cec5SDimitry Andric   if (ErrOffset)
7030b57cec5SDimitry Andric     *ErrOffset = Rest - Data;
7040b57cec5SDimitry Andric   return false;
7050b57cec5SDimitry Andric }
7060b57cec5SDimitry Andric 
7070b57cec5SDimitry Andric std::string fixUTF8(llvm::StringRef S) {
7080b57cec5SDimitry Andric   // This isn't particularly efficient, but is only for error-recovery.
7090b57cec5SDimitry Andric   std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices.
7100b57cec5SDimitry Andric   const UTF8 *In8 = reinterpret_cast<const UTF8 *>(S.data());
7110b57cec5SDimitry Andric   UTF32 *Out32 = Codepoints.data();
7120b57cec5SDimitry Andric   ConvertUTF8toUTF32(&In8, In8 + S.size(), &Out32, Out32 + Codepoints.size(),
7130b57cec5SDimitry Andric                      lenientConversion);
7140b57cec5SDimitry Andric   Codepoints.resize(Out32 - Codepoints.data());
7150b57cec5SDimitry Andric   std::string Res(4 * Codepoints.size(), 0); // 4 bytes per codepoint suffice
7160b57cec5SDimitry Andric   const UTF32 *In32 = Codepoints.data();
7170b57cec5SDimitry Andric   UTF8 *Out8 = reinterpret_cast<UTF8 *>(&Res[0]);
7180b57cec5SDimitry Andric   ConvertUTF32toUTF8(&In32, In32 + Codepoints.size(), &Out8, Out8 + Res.size(),
7190b57cec5SDimitry Andric                      strictConversion);
7200b57cec5SDimitry Andric   Res.resize(reinterpret_cast<char *>(Out8) - Res.data());
7210b57cec5SDimitry Andric   return Res;
7220b57cec5SDimitry Andric }
7230b57cec5SDimitry Andric 
7240b57cec5SDimitry Andric static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
7250b57cec5SDimitry Andric   OS << '\"';
7260b57cec5SDimitry Andric   for (unsigned char C : S) {
7270b57cec5SDimitry Andric     if (C == 0x22 || C == 0x5C)
7280b57cec5SDimitry Andric       OS << '\\';
7290b57cec5SDimitry Andric     if (C >= 0x20) {
7300b57cec5SDimitry Andric       OS << C;
7310b57cec5SDimitry Andric       continue;
7320b57cec5SDimitry Andric     }
7330b57cec5SDimitry Andric     OS << '\\';
7340b57cec5SDimitry Andric     switch (C) {
7350b57cec5SDimitry Andric     // A few characters are common enough to make short escapes worthwhile.
7360b57cec5SDimitry Andric     case '\t':
7370b57cec5SDimitry Andric       OS << 't';
7380b57cec5SDimitry Andric       break;
7390b57cec5SDimitry Andric     case '\n':
7400b57cec5SDimitry Andric       OS << 'n';
7410b57cec5SDimitry Andric       break;
7420b57cec5SDimitry Andric     case '\r':
7430b57cec5SDimitry Andric       OS << 'r';
7440b57cec5SDimitry Andric       break;
7450b57cec5SDimitry Andric     default:
7460b57cec5SDimitry Andric       OS << 'u';
7470b57cec5SDimitry Andric       llvm::write_hex(OS, C, llvm::HexPrintStyle::Lower, 4);
7480b57cec5SDimitry Andric       break;
7490b57cec5SDimitry Andric     }
7500b57cec5SDimitry Andric   }
7510b57cec5SDimitry Andric   OS << '\"';
7520b57cec5SDimitry Andric }
7530b57cec5SDimitry Andric 
7540b57cec5SDimitry Andric void llvm::json::OStream::value(const Value &V) {
7550b57cec5SDimitry Andric   switch (V.kind()) {
7560b57cec5SDimitry Andric   case Value::Null:
7570b57cec5SDimitry Andric     valueBegin();
7580b57cec5SDimitry Andric     OS << "null";
7590b57cec5SDimitry Andric     return;
7600b57cec5SDimitry Andric   case Value::Boolean:
7610b57cec5SDimitry Andric     valueBegin();
7620b57cec5SDimitry Andric     OS << (*V.getAsBoolean() ? "true" : "false");
7630b57cec5SDimitry Andric     return;
7640b57cec5SDimitry Andric   case Value::Number:
7650b57cec5SDimitry Andric     valueBegin();
7660b57cec5SDimitry Andric     if (V.Type == Value::T_Integer)
7670b57cec5SDimitry Andric       OS << *V.getAsInteger();
768349cc55cSDimitry Andric     else if (V.Type == Value::T_UINT64)
769349cc55cSDimitry Andric       OS << *V.getAsUINT64();
7700b57cec5SDimitry Andric     else
7710b57cec5SDimitry Andric       OS << format("%.*g", std::numeric_limits<double>::max_digits10,
7720b57cec5SDimitry Andric                    *V.getAsNumber());
7730b57cec5SDimitry Andric     return;
7740b57cec5SDimitry Andric   case Value::String:
7750b57cec5SDimitry Andric     valueBegin();
7760b57cec5SDimitry Andric     quote(OS, *V.getAsString());
7770b57cec5SDimitry Andric     return;
7780b57cec5SDimitry Andric   case Value::Array:
7790b57cec5SDimitry Andric     return array([&] {
7800b57cec5SDimitry Andric       for (const Value &E : *V.getAsArray())
7810b57cec5SDimitry Andric         value(E);
7820b57cec5SDimitry Andric     });
7830b57cec5SDimitry Andric   case Value::Object:
7840b57cec5SDimitry Andric     return object([&] {
7850b57cec5SDimitry Andric       for (const Object::value_type *E : sortedElements(*V.getAsObject()))
7860b57cec5SDimitry Andric         attribute(E->first, E->second);
7870b57cec5SDimitry Andric     });
7880b57cec5SDimitry Andric   }
7890b57cec5SDimitry Andric }
7900b57cec5SDimitry Andric 
7910b57cec5SDimitry Andric void llvm::json::OStream::valueBegin() {
7920b57cec5SDimitry Andric   assert(Stack.back().Ctx != Object && "Only attributes allowed here");
7930b57cec5SDimitry Andric   if (Stack.back().HasValue) {
7940b57cec5SDimitry Andric     assert(Stack.back().Ctx != Singleton && "Only one value allowed here");
7950b57cec5SDimitry Andric     OS << ',';
7960b57cec5SDimitry Andric   }
7970b57cec5SDimitry Andric   if (Stack.back().Ctx == Array)
7980b57cec5SDimitry Andric     newline();
799e8d8bef9SDimitry Andric   flushComment();
8000b57cec5SDimitry Andric   Stack.back().HasValue = true;
8010b57cec5SDimitry Andric }
8020b57cec5SDimitry Andric 
803e8d8bef9SDimitry Andric void OStream::comment(llvm::StringRef Comment) {
804e8d8bef9SDimitry Andric   assert(PendingComment.empty() && "Only one comment per value!");
805e8d8bef9SDimitry Andric   PendingComment = Comment;
806e8d8bef9SDimitry Andric }
807e8d8bef9SDimitry Andric 
808e8d8bef9SDimitry Andric void OStream::flushComment() {
809e8d8bef9SDimitry Andric   if (PendingComment.empty())
810e8d8bef9SDimitry Andric     return;
811e8d8bef9SDimitry Andric   OS << (IndentSize ? "/* " : "/*");
812e8d8bef9SDimitry Andric   // Be sure not to accidentally emit "*/". Transform to "* /".
813e8d8bef9SDimitry Andric   while (!PendingComment.empty()) {
814e8d8bef9SDimitry Andric     auto Pos = PendingComment.find("*/");
815e8d8bef9SDimitry Andric     if (Pos == StringRef::npos) {
816e8d8bef9SDimitry Andric       OS << PendingComment;
817e8d8bef9SDimitry Andric       PendingComment = "";
818e8d8bef9SDimitry Andric     } else {
819e8d8bef9SDimitry Andric       OS << PendingComment.take_front(Pos) << "* /";
820e8d8bef9SDimitry Andric       PendingComment = PendingComment.drop_front(Pos + 2);
821e8d8bef9SDimitry Andric     }
822e8d8bef9SDimitry Andric   }
823e8d8bef9SDimitry Andric   OS << (IndentSize ? " */" : "*/");
824e8d8bef9SDimitry Andric   // Comments are on their own line unless attached to an attribute value.
825e8d8bef9SDimitry Andric   if (Stack.size() > 1 && Stack.back().Ctx == Singleton) {
826e8d8bef9SDimitry Andric     if (IndentSize)
827e8d8bef9SDimitry Andric       OS << ' ';
828e8d8bef9SDimitry Andric   } else {
829e8d8bef9SDimitry Andric     newline();
830e8d8bef9SDimitry Andric   }
831e8d8bef9SDimitry Andric }
832e8d8bef9SDimitry Andric 
8330b57cec5SDimitry Andric void llvm::json::OStream::newline() {
8340b57cec5SDimitry Andric   if (IndentSize) {
8350b57cec5SDimitry Andric     OS.write('\n');
8360b57cec5SDimitry Andric     OS.indent(Indent);
8370b57cec5SDimitry Andric   }
8380b57cec5SDimitry Andric }
8390b57cec5SDimitry Andric 
8400b57cec5SDimitry Andric void llvm::json::OStream::arrayBegin() {
8410b57cec5SDimitry Andric   valueBegin();
8420b57cec5SDimitry Andric   Stack.emplace_back();
8430b57cec5SDimitry Andric   Stack.back().Ctx = Array;
8440b57cec5SDimitry Andric   Indent += IndentSize;
8450b57cec5SDimitry Andric   OS << '[';
8460b57cec5SDimitry Andric }
8470b57cec5SDimitry Andric 
8480b57cec5SDimitry Andric void llvm::json::OStream::arrayEnd() {
8490b57cec5SDimitry Andric   assert(Stack.back().Ctx == Array);
8500b57cec5SDimitry Andric   Indent -= IndentSize;
8510b57cec5SDimitry Andric   if (Stack.back().HasValue)
8520b57cec5SDimitry Andric     newline();
8530b57cec5SDimitry Andric   OS << ']';
854e8d8bef9SDimitry Andric   assert(PendingComment.empty());
8550b57cec5SDimitry Andric   Stack.pop_back();
8560b57cec5SDimitry Andric   assert(!Stack.empty());
8570b57cec5SDimitry Andric }
8580b57cec5SDimitry Andric 
8590b57cec5SDimitry Andric void llvm::json::OStream::objectBegin() {
8600b57cec5SDimitry Andric   valueBegin();
8610b57cec5SDimitry Andric   Stack.emplace_back();
8620b57cec5SDimitry Andric   Stack.back().Ctx = Object;
8630b57cec5SDimitry Andric   Indent += IndentSize;
8640b57cec5SDimitry Andric   OS << '{';
8650b57cec5SDimitry Andric }
8660b57cec5SDimitry Andric 
8670b57cec5SDimitry Andric void llvm::json::OStream::objectEnd() {
8680b57cec5SDimitry Andric   assert(Stack.back().Ctx == Object);
8690b57cec5SDimitry Andric   Indent -= IndentSize;
8700b57cec5SDimitry Andric   if (Stack.back().HasValue)
8710b57cec5SDimitry Andric     newline();
8720b57cec5SDimitry Andric   OS << '}';
873e8d8bef9SDimitry Andric   assert(PendingComment.empty());
8740b57cec5SDimitry Andric   Stack.pop_back();
8750b57cec5SDimitry Andric   assert(!Stack.empty());
8760b57cec5SDimitry Andric }
8770b57cec5SDimitry Andric 
8780b57cec5SDimitry Andric void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
8790b57cec5SDimitry Andric   assert(Stack.back().Ctx == Object);
8800b57cec5SDimitry Andric   if (Stack.back().HasValue)
8810b57cec5SDimitry Andric     OS << ',';
8820b57cec5SDimitry Andric   newline();
883e8d8bef9SDimitry Andric   flushComment();
8840b57cec5SDimitry Andric   Stack.back().HasValue = true;
8850b57cec5SDimitry Andric   Stack.emplace_back();
8860b57cec5SDimitry Andric   Stack.back().Ctx = Singleton;
8870b57cec5SDimitry Andric   if (LLVM_LIKELY(isUTF8(Key))) {
8880b57cec5SDimitry Andric     quote(OS, Key);
8890b57cec5SDimitry Andric   } else {
8900b57cec5SDimitry Andric     assert(false && "Invalid UTF-8 in attribute key");
8910b57cec5SDimitry Andric     quote(OS, fixUTF8(Key));
8920b57cec5SDimitry Andric   }
8930b57cec5SDimitry Andric   OS.write(':');
8940b57cec5SDimitry Andric   if (IndentSize)
8950b57cec5SDimitry Andric     OS.write(' ');
8960b57cec5SDimitry Andric }
8970b57cec5SDimitry Andric 
8980b57cec5SDimitry Andric void llvm::json::OStream::attributeEnd() {
8990b57cec5SDimitry Andric   assert(Stack.back().Ctx == Singleton);
9000b57cec5SDimitry Andric   assert(Stack.back().HasValue && "Attribute must have a value");
901e8d8bef9SDimitry Andric   assert(PendingComment.empty());
9020b57cec5SDimitry Andric   Stack.pop_back();
9030b57cec5SDimitry Andric   assert(Stack.back().Ctx == Object);
9040b57cec5SDimitry Andric }
9050b57cec5SDimitry Andric 
906e8d8bef9SDimitry Andric raw_ostream &llvm::json::OStream::rawValueBegin() {
907e8d8bef9SDimitry Andric   valueBegin();
908e8d8bef9SDimitry Andric   Stack.emplace_back();
909e8d8bef9SDimitry Andric   Stack.back().Ctx = RawValue;
910e8d8bef9SDimitry Andric   return OS;
911e8d8bef9SDimitry Andric }
912e8d8bef9SDimitry Andric 
913e8d8bef9SDimitry Andric void llvm::json::OStream::rawValueEnd() {
914e8d8bef9SDimitry Andric   assert(Stack.back().Ctx == RawValue);
915e8d8bef9SDimitry Andric   Stack.pop_back();
916e8d8bef9SDimitry Andric }
917e8d8bef9SDimitry Andric 
9180b57cec5SDimitry Andric } // namespace json
9190b57cec5SDimitry Andric } // namespace llvm
9200b57cec5SDimitry Andric 
9210b57cec5SDimitry Andric void llvm::format_provider<llvm::json::Value>::format(
9220b57cec5SDimitry Andric     const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
9230b57cec5SDimitry Andric   unsigned IndentAmount = 0;
9240b57cec5SDimitry Andric   if (!Options.empty() && Options.getAsInteger(/*Radix=*/10, IndentAmount))
9250b57cec5SDimitry Andric     llvm_unreachable("json::Value format options should be an integer");
9260b57cec5SDimitry Andric   json::OStream(OS, IndentAmount).value(E);
9270b57cec5SDimitry Andric }
9280b57cec5SDimitry Andric 
929