xref: /llvm-project/llvm/unittests/Support/JSONTest.cpp (revision 2946cd701067404b99c39fb29dc9c74bd7193eb3)
1 //===-- JSONTest.cpp - JSON unit tests --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Support/JSON.h"
10 
11 #include "gmock/gmock.h"
12 #include "gtest/gtest.h"
13 
14 namespace llvm {
15 namespace json {
16 
17 namespace {
18 
19 std::string s(const Value &E) { return llvm::formatv("{0}", E).str(); }
20 std::string sp(const Value &E) { return llvm::formatv("{0:2}", E).str(); }
21 
22 TEST(JSONTest, Types) {
23   EXPECT_EQ("true", s(true));
24   EXPECT_EQ("null", s(nullptr));
25   EXPECT_EQ("2.5", s(2.5));
26   EXPECT_EQ(R"("foo")", s("foo"));
27   EXPECT_EQ("[1,2,3]", s({1, 2, 3}));
28   EXPECT_EQ(R"({"x":10,"y":20})", s(Object{{"x", 10}, {"y", 20}}));
29 
30 #ifdef NDEBUG
31   EXPECT_EQ(R"("��")", s("\xC0\x80"));
32   EXPECT_EQ(R"({"��":0})", s(Object{{"\xC0\x80", 0}}));
33 #else
34   EXPECT_DEATH(s("\xC0\x80"), "Invalid UTF-8");
35   EXPECT_DEATH(s(Object{{"\xC0\x80", 0}}), "Invalid UTF-8");
36 #endif
37 }
38 
39 TEST(JSONTest, Constructors) {
40   // Lots of edge cases around empty and singleton init lists.
41   EXPECT_EQ("[[[3]]]", s({{{3}}}));
42   EXPECT_EQ("[[[]]]", s({{{}}}));
43   EXPECT_EQ("[[{}]]", s({{Object{}}}));
44   EXPECT_EQ(R"({"A":{"B":{}}})", s(Object{{"A", Object{{"B", Object{}}}}}));
45   EXPECT_EQ(R"({"A":{"B":{"X":"Y"}}})",
46             s(Object{{"A", Object{{"B", Object{{"X", "Y"}}}}}}));
47   EXPECT_EQ("null", s(llvm::Optional<double>()));
48   EXPECT_EQ("2.5", s(llvm::Optional<double>(2.5)));
49   EXPECT_EQ("[[2.5,null]]", s(std::vector<std::vector<llvm::Optional<double>>>{
50                                  {2.5, llvm::None}}));
51 }
52 
53 TEST(JSONTest, StringOwnership) {
54   char X[] = "Hello";
55   Value Alias = static_cast<const char *>(X);
56   X[1] = 'a';
57   EXPECT_EQ(R"("Hallo")", s(Alias));
58 
59   std::string Y = "Hello";
60   Value Copy = Y;
61   Y[1] = 'a';
62   EXPECT_EQ(R"("Hello")", s(Copy));
63 }
64 
65 TEST(JSONTest, CanonicalOutput) {
66   // Objects are sorted (but arrays aren't)!
67   EXPECT_EQ(R"({"a":1,"b":2,"c":3})", s(Object{{"a", 1}, {"c", 3}, {"b", 2}}));
68   EXPECT_EQ(R"(["a","c","b"])", s({"a", "c", "b"}));
69   EXPECT_EQ("3", s(3.0));
70 }
71 
72 TEST(JSONTest, Escaping) {
73   std::string test = {
74       0,                    // Strings may contain nulls.
75       '\b',   '\f',         // Have mnemonics, but we escape numerically.
76       '\r',   '\n',   '\t', // Escaped with mnemonics.
77       'S',    '\"',   '\\', // Printable ASCII characters.
78       '\x7f',               // Delete is not escaped.
79       '\xce', '\x94',       // Non-ASCII UTF-8 is not escaped.
80   };
81 
82   std::string teststring = R"("\u0000\u0008\u000c\r\n\tS\"\\)"
83                            "\x7f\xCE\x94\"";
84 
85   EXPECT_EQ(teststring, s(test));
86 
87   EXPECT_EQ(R"({"object keys are\nescaped":true})",
88             s(Object{{"object keys are\nescaped", true}}));
89 }
90 
91 TEST(JSONTest, PrettyPrinting) {
92   const char str[] = R"({
93   "empty_array": [],
94   "empty_object": {},
95   "full_array": [
96     1,
97     null
98   ],
99   "full_object": {
100     "nested_array": [
101       {
102         "property": "value"
103       }
104     ]
105   }
106 })";
107 
108   EXPECT_EQ(str, sp(Object{
109                      {"empty_object", Object{}},
110                      {"empty_array", {}},
111                      {"full_array", {1, nullptr}},
112                      {"full_object",
113                       Object{
114                           {"nested_array",
115                            {Object{
116                                {"property", "value"},
117                            }}},
118                       }},
119                  }));
120 }
121 
122 TEST(JSONTest, Parse) {
123   auto Compare = [](llvm::StringRef S, Value Expected) {
124     if (auto E = parse(S)) {
125       // Compare both string forms and with operator==, in case we have bugs.
126       EXPECT_EQ(*E, Expected);
127       EXPECT_EQ(sp(*E), sp(Expected));
128     } else {
129       handleAllErrors(E.takeError(), [S](const llvm::ErrorInfoBase &E) {
130         FAIL() << "Failed to parse JSON >>> " << S << " <<<: " << E.message();
131       });
132     }
133   };
134 
135   Compare(R"(true)", true);
136   Compare(R"(false)", false);
137   Compare(R"(null)", nullptr);
138 
139   Compare(R"(42)", 42);
140   Compare(R"(2.5)", 2.5);
141   Compare(R"(2e50)", 2e50);
142   Compare(R"(1.2e3456789)", std::numeric_limits<double>::infinity());
143 
144   Compare(R"("foo")", "foo");
145   Compare(R"("\"\\\b\f\n\r\t")", "\"\\\b\f\n\r\t");
146   Compare(R"("\u0000")", llvm::StringRef("\0", 1));
147   Compare("\"\x7f\"", "\x7f");
148   Compare(R"("\ud801\udc37")", u8"\U00010437"); // UTF16 surrogate pair escape.
149   Compare("\"\xE2\x82\xAC\xF0\x9D\x84\x9E\"", u8"\u20ac\U0001d11e"); // UTF8
150   Compare(
151       R"("LoneLeading=\ud801, LoneTrailing=\udc01, LeadingLeadingTrailing=\ud801\ud801\udc37")",
152       u8"LoneLeading=\ufffd, LoneTrailing=\ufffd, "
153       u8"LeadingLeadingTrailing=\ufffd\U00010437"); // Invalid unicode.
154 
155   Compare(R"({"":0,"":0})", Object{{"", 0}});
156   Compare(R"({"obj":{},"arr":[]})", Object{{"obj", Object{}}, {"arr", {}}});
157   Compare(R"({"\n":{"\u0000":[[[[]]]]}})",
158           Object{{"\n", Object{
159                             {llvm::StringRef("\0", 1), {{{{}}}}},
160                         }}});
161   Compare("\r[\n\t] ", {});
162 }
163 
164 TEST(JSONTest, ParseErrors) {
165   auto ExpectErr = [](llvm::StringRef Msg, llvm::StringRef S) {
166     if (auto E = parse(S)) {
167       // Compare both string forms and with operator==, in case we have bugs.
168       FAIL() << "Parsed JSON >>> " << S << " <<< but wanted error: " << Msg;
169     } else {
170       handleAllErrors(E.takeError(), [S, Msg](const llvm::ErrorInfoBase &E) {
171         EXPECT_THAT(E.message(), testing::HasSubstr(Msg)) << S;
172       });
173     }
174   };
175   ExpectErr("Unexpected EOF", "");
176   ExpectErr("Unexpected EOF", "[");
177   ExpectErr("Text after end of document", "[][]");
178   ExpectErr("Invalid JSON value (false?)", "fuzzy");
179   ExpectErr("Expected , or ]", "[2?]");
180   ExpectErr("Expected object key", "{a:2}");
181   ExpectErr("Expected : after object key", R"({"a",2})");
182   ExpectErr("Expected , or } after object property", R"({"a":2 "b":3})");
183   ExpectErr("Invalid JSON value", R"([&%!])");
184   ExpectErr("Invalid JSON value (number?)", "1e1.0");
185   ExpectErr("Unterminated string", R"("abc\"def)");
186   ExpectErr("Control character in string", "\"abc\ndef\"");
187   ExpectErr("Invalid escape sequence", R"("\030")");
188   ExpectErr("Invalid \\u escape sequence", R"("\usuck")");
189   ExpectErr("[3:3, byte=19]", R"({
190   "valid": 1,
191   invalid: 2
192 })");
193   ExpectErr("Invalid UTF-8 sequence", "\"\xC0\x80\""); // WTF-8 null
194 }
195 
196 // Direct tests of isUTF8 and fixUTF8. Internal uses are also tested elsewhere.
197 TEST(JSONTest, UTF8) {
198   for (const char *Valid : {
199            "this is ASCII text",
200            "thïs tëxt häs BMP chäräctërs",
201            "����L���� C��������",
202        }) {
203     EXPECT_TRUE(isUTF8(Valid)) << Valid;
204     EXPECT_EQ(fixUTF8(Valid), Valid);
205   }
206   for (auto Invalid : std::vector<std::pair<const char *, const char *>>{
207            {"lone trailing \x81\x82 bytes", "lone trailing �� bytes"},
208            {"missing trailing \xD0 bytes", "missing trailing � bytes"},
209            {"truncated character \xD0", "truncated character �"},
210            {"not \xC1\x80 the \xE0\x9f\xBF shortest \xF0\x83\x83\x83 encoding",
211             "not �� the ��� shortest ���� encoding"},
212            {"too \xF9\x80\x80\x80\x80 long", "too ����� long"},
213            {"surrogate \xED\xA0\x80 invalid \xF4\x90\x80\x80",
214             "surrogate ��� invalid ����"}}) {
215     EXPECT_FALSE(isUTF8(Invalid.first)) << Invalid.first;
216     EXPECT_EQ(fixUTF8(Invalid.first), Invalid.second);
217   }
218 }
219 
220 TEST(JSONTest, Inspection) {
221   llvm::Expected<Value> Doc = parse(R"(
222     {
223       "null": null,
224       "boolean": false,
225       "number": 2.78,
226       "string": "json",
227       "array": [null, true, 3.14, "hello", [1,2,3], {"time": "arrow"}],
228       "object": {"fruit": "banana"}
229     }
230   )");
231   EXPECT_TRUE(!!Doc);
232 
233   Object *O = Doc->getAsObject();
234   ASSERT_TRUE(O);
235 
236   EXPECT_FALSE(O->getNull("missing"));
237   EXPECT_FALSE(O->getNull("boolean"));
238   EXPECT_TRUE(O->getNull("null"));
239 
240   EXPECT_EQ(O->getNumber("number"), llvm::Optional<double>(2.78));
241   EXPECT_FALSE(O->getInteger("number"));
242   EXPECT_EQ(O->getString("string"), llvm::Optional<llvm::StringRef>("json"));
243   ASSERT_FALSE(O->getObject("missing"));
244   ASSERT_FALSE(O->getObject("array"));
245   ASSERT_TRUE(O->getObject("object"));
246   EXPECT_EQ(*O->getObject("object"), (Object{{"fruit", "banana"}}));
247 
248   Array *A = O->getArray("array");
249   ASSERT_TRUE(A);
250   EXPECT_EQ((*A)[1].getAsBoolean(), llvm::Optional<bool>(true));
251   ASSERT_TRUE((*A)[4].getAsArray());
252   EXPECT_EQ(*(*A)[4].getAsArray(), (Array{1, 2, 3}));
253   EXPECT_EQ((*(*A)[4].getAsArray())[1].getAsInteger(),
254             llvm::Optional<int64_t>(2));
255   int I = 0;
256   for (Value &E : *A) {
257     if (I++ == 5) {
258       ASSERT_TRUE(E.getAsObject());
259       EXPECT_EQ(E.getAsObject()->getString("time"),
260                 llvm::Optional<llvm::StringRef>("arrow"));
261     } else
262       EXPECT_FALSE(E.getAsObject());
263   }
264 }
265 
266 // Verify special integer handling - we try to preserve exact int64 values.
267 TEST(JSONTest, Integers) {
268   struct {
269     const char *Desc;
270     Value Val;
271     const char *Str;
272     llvm::Optional<int64_t> AsInt;
273     llvm::Optional<double> AsNumber;
274   } TestCases[] = {
275       {
276           "Non-integer. Stored as double, not convertible.",
277           double{1.5},
278           "1.5",
279           llvm::None,
280           1.5,
281       },
282 
283       {
284           "Integer, not exact double. Stored as int64, convertible.",
285           int64_t{0x4000000000000001},
286           "4611686018427387905",
287           int64_t{0x4000000000000001},
288           double{0x4000000000000000},
289       },
290 
291       {
292           "Negative integer, not exact double. Stored as int64, convertible.",
293           int64_t{-0x4000000000000001},
294           "-4611686018427387905",
295           int64_t{-0x4000000000000001},
296           double{-0x4000000000000000},
297       },
298 
299       {
300           "Dynamically exact integer. Stored as double, convertible.",
301           double{0x6000000000000000},
302           "6.9175290276410819e+18",
303           int64_t{0x6000000000000000},
304           double{0x6000000000000000},
305       },
306 
307       {
308           "Dynamically integer, >64 bits. Stored as double, not convertible.",
309           1.5 * double{0x8000000000000000},
310           "1.3835058055282164e+19",
311           llvm::None,
312           1.5 * double{0x8000000000000000},
313       },
314   };
315   for (const auto &T : TestCases) {
316     EXPECT_EQ(T.Str, s(T.Val)) << T.Desc;
317     llvm::Expected<Value> Doc = parse(T.Str);
318     EXPECT_TRUE(!!Doc) << T.Desc;
319     EXPECT_EQ(Doc->getAsInteger(), T.AsInt) << T.Desc;
320     EXPECT_EQ(Doc->getAsNumber(), T.AsNumber) << T.Desc;
321     EXPECT_EQ(T.Val, *Doc) << T.Desc;
322     EXPECT_EQ(T.Str, s(*Doc)) << T.Desc;
323   }
324 }
325 
326 // Sample struct with typical JSON-mapping rules.
327 struct CustomStruct {
328   CustomStruct() : B(false) {}
329   CustomStruct(std::string S, llvm::Optional<int> I, bool B)
330       : S(S), I(I), B(B) {}
331   std::string S;
332   llvm::Optional<int> I;
333   bool B;
334 };
335 inline bool operator==(const CustomStruct &L, const CustomStruct &R) {
336   return L.S == R.S && L.I == R.I && L.B == R.B;
337 }
338 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
339                                      const CustomStruct &S) {
340   return OS << "(" << S.S << ", " << (S.I ? std::to_string(*S.I) : "None")
341             << ", " << S.B << ")";
342 }
343 bool fromJSON(const Value &E, CustomStruct &R) {
344   ObjectMapper O(E);
345   if (!O || !O.map("str", R.S) || !O.map("int", R.I))
346     return false;
347   O.map("bool", R.B);
348   return true;
349 }
350 
351 TEST(JSONTest, Deserialize) {
352   std::map<std::string, std::vector<CustomStruct>> R;
353   CustomStruct ExpectedStruct = {"foo", 42, true};
354   std::map<std::string, std::vector<CustomStruct>> Expected;
355   Value J = Object{
356       {"foo",
357        Array{
358            Object{
359                {"str", "foo"},
360                {"int", 42},
361                {"bool", true},
362                {"unknown", "ignored"},
363            },
364            Object{{"str", "bar"}},
365            Object{
366                {"str", "baz"}, {"bool", "string"}, // OK, deserialize ignores.
367            },
368        }}};
369   Expected["foo"] = {
370       CustomStruct("foo", 42, true),
371       CustomStruct("bar", llvm::None, false),
372       CustomStruct("baz", llvm::None, false),
373   };
374   ASSERT_TRUE(fromJSON(J, R));
375   EXPECT_EQ(R, Expected);
376 
377   CustomStruct V;
378   EXPECT_FALSE(fromJSON(nullptr, V)) << "Not an object " << V;
379   EXPECT_FALSE(fromJSON(Object{}, V)) << "Missing required field " << V;
380   EXPECT_FALSE(fromJSON(Object{{"str", 1}}, V)) << "Wrong type " << V;
381   // Optional<T> must parse as the correct type if present.
382   EXPECT_FALSE(fromJSON(Object{{"str", 1}, {"int", "string"}}, V))
383       << "Wrong type for Optional<T> " << V;
384 }
385 
386 } // namespace
387 } // namespace json
388 } // namespace llvm
389