1 //===-- JSONTest.cpp - JSON unit tests --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Support/JSON.h" 10 11 #include "gmock/gmock.h" 12 #include "gtest/gtest.h" 13 14 namespace llvm { 15 namespace json { 16 17 namespace { 18 19 std::string s(const Value &E) { return llvm::formatv("{0}", E).str(); } 20 std::string sp(const Value &E) { return llvm::formatv("{0:2}", E).str(); } 21 22 TEST(JSONTest, Types) { 23 EXPECT_EQ("true", s(true)); 24 EXPECT_EQ("null", s(nullptr)); 25 EXPECT_EQ("2.5", s(2.5)); 26 EXPECT_EQ(R"("foo")", s("foo")); 27 EXPECT_EQ("[1,2,3]", s({1, 2, 3})); 28 EXPECT_EQ(R"({"x":10,"y":20})", s(Object{{"x", 10}, {"y", 20}})); 29 30 #ifdef NDEBUG 31 EXPECT_EQ(R"("��")", s("\xC0\x80")); 32 EXPECT_EQ(R"({"��":0})", s(Object{{"\xC0\x80", 0}})); 33 #else 34 EXPECT_DEATH(s("\xC0\x80"), "Invalid UTF-8"); 35 EXPECT_DEATH(s(Object{{"\xC0\x80", 0}}), "Invalid UTF-8"); 36 #endif 37 } 38 39 TEST(JSONTest, Constructors) { 40 // Lots of edge cases around empty and singleton init lists. 41 EXPECT_EQ("[[[3]]]", s({{{3}}})); 42 EXPECT_EQ("[[[]]]", s({{{}}})); 43 EXPECT_EQ("[[{}]]", s({{Object{}}})); 44 EXPECT_EQ(R"({"A":{"B":{}}})", s(Object{{"A", Object{{"B", Object{}}}}})); 45 EXPECT_EQ(R"({"A":{"B":{"X":"Y"}}})", 46 s(Object{{"A", Object{{"B", Object{{"X", "Y"}}}}}})); 47 EXPECT_EQ("null", s(llvm::Optional<double>())); 48 EXPECT_EQ("2.5", s(llvm::Optional<double>(2.5))); 49 EXPECT_EQ("[[2.5,null]]", s(std::vector<std::vector<llvm::Optional<double>>>{ 50 {2.5, llvm::None}})); 51 } 52 53 TEST(JSONTest, StringOwnership) { 54 char X[] = "Hello"; 55 Value Alias = static_cast<const char *>(X); 56 X[1] = 'a'; 57 EXPECT_EQ(R"("Hallo")", s(Alias)); 58 59 std::string Y = "Hello"; 60 Value Copy = Y; 61 Y[1] = 'a'; 62 EXPECT_EQ(R"("Hello")", s(Copy)); 63 } 64 65 TEST(JSONTest, CanonicalOutput) { 66 // Objects are sorted (but arrays aren't)! 67 EXPECT_EQ(R"({"a":1,"b":2,"c":3})", s(Object{{"a", 1}, {"c", 3}, {"b", 2}})); 68 EXPECT_EQ(R"(["a","c","b"])", s({"a", "c", "b"})); 69 EXPECT_EQ("3", s(3.0)); 70 } 71 72 TEST(JSONTest, Escaping) { 73 std::string test = { 74 0, // Strings may contain nulls. 75 '\b', '\f', // Have mnemonics, but we escape numerically. 76 '\r', '\n', '\t', // Escaped with mnemonics. 77 'S', '\"', '\\', // Printable ASCII characters. 78 '\x7f', // Delete is not escaped. 79 '\xce', '\x94', // Non-ASCII UTF-8 is not escaped. 80 }; 81 82 std::string teststring = R"("\u0000\u0008\u000c\r\n\tS\"\\)" 83 "\x7f\xCE\x94\""; 84 85 EXPECT_EQ(teststring, s(test)); 86 87 EXPECT_EQ(R"({"object keys are\nescaped":true})", 88 s(Object{{"object keys are\nescaped", true}})); 89 } 90 91 TEST(JSONTest, PrettyPrinting) { 92 const char str[] = R"({ 93 "empty_array": [], 94 "empty_object": {}, 95 "full_array": [ 96 1, 97 null 98 ], 99 "full_object": { 100 "nested_array": [ 101 { 102 "property": "value" 103 } 104 ] 105 } 106 })"; 107 108 EXPECT_EQ(str, sp(Object{ 109 {"empty_object", Object{}}, 110 {"empty_array", {}}, 111 {"full_array", {1, nullptr}}, 112 {"full_object", 113 Object{ 114 {"nested_array", 115 {Object{ 116 {"property", "value"}, 117 }}}, 118 }}, 119 })); 120 } 121 122 TEST(JSONTest, Parse) { 123 auto Compare = [](llvm::StringRef S, Value Expected) { 124 if (auto E = parse(S)) { 125 // Compare both string forms and with operator==, in case we have bugs. 126 EXPECT_EQ(*E, Expected); 127 EXPECT_EQ(sp(*E), sp(Expected)); 128 } else { 129 handleAllErrors(E.takeError(), [S](const llvm::ErrorInfoBase &E) { 130 FAIL() << "Failed to parse JSON >>> " << S << " <<<: " << E.message(); 131 }); 132 } 133 }; 134 135 Compare(R"(true)", true); 136 Compare(R"(false)", false); 137 Compare(R"(null)", nullptr); 138 139 Compare(R"(42)", 42); 140 Compare(R"(2.5)", 2.5); 141 Compare(R"(2e50)", 2e50); 142 Compare(R"(1.2e3456789)", std::numeric_limits<double>::infinity()); 143 144 Compare(R"("foo")", "foo"); 145 Compare(R"("\"\\\b\f\n\r\t")", "\"\\\b\f\n\r\t"); 146 Compare(R"("\u0000")", llvm::StringRef("\0", 1)); 147 Compare("\"\x7f\"", "\x7f"); 148 Compare(R"("\ud801\udc37")", u8"\U00010437"); // UTF16 surrogate pair escape. 149 Compare("\"\xE2\x82\xAC\xF0\x9D\x84\x9E\"", u8"\u20ac\U0001d11e"); // UTF8 150 Compare( 151 R"("LoneLeading=\ud801, LoneTrailing=\udc01, LeadingLeadingTrailing=\ud801\ud801\udc37")", 152 u8"LoneLeading=\ufffd, LoneTrailing=\ufffd, " 153 u8"LeadingLeadingTrailing=\ufffd\U00010437"); // Invalid unicode. 154 155 Compare(R"({"":0,"":0})", Object{{"", 0}}); 156 Compare(R"({"obj":{},"arr":[]})", Object{{"obj", Object{}}, {"arr", {}}}); 157 Compare(R"({"\n":{"\u0000":[[[[]]]]}})", 158 Object{{"\n", Object{ 159 {llvm::StringRef("\0", 1), {{{{}}}}}, 160 }}}); 161 Compare("\r[\n\t] ", {}); 162 } 163 164 TEST(JSONTest, ParseErrors) { 165 auto ExpectErr = [](llvm::StringRef Msg, llvm::StringRef S) { 166 if (auto E = parse(S)) { 167 // Compare both string forms and with operator==, in case we have bugs. 168 FAIL() << "Parsed JSON >>> " << S << " <<< but wanted error: " << Msg; 169 } else { 170 handleAllErrors(E.takeError(), [S, Msg](const llvm::ErrorInfoBase &E) { 171 EXPECT_THAT(E.message(), testing::HasSubstr(Msg)) << S; 172 }); 173 } 174 }; 175 ExpectErr("Unexpected EOF", ""); 176 ExpectErr("Unexpected EOF", "["); 177 ExpectErr("Text after end of document", "[][]"); 178 ExpectErr("Invalid JSON value (false?)", "fuzzy"); 179 ExpectErr("Expected , or ]", "[2?]"); 180 ExpectErr("Expected object key", "{a:2}"); 181 ExpectErr("Expected : after object key", R"({"a",2})"); 182 ExpectErr("Expected , or } after object property", R"({"a":2 "b":3})"); 183 ExpectErr("Invalid JSON value", R"([&%!])"); 184 ExpectErr("Invalid JSON value (number?)", "1e1.0"); 185 ExpectErr("Unterminated string", R"("abc\"def)"); 186 ExpectErr("Control character in string", "\"abc\ndef\""); 187 ExpectErr("Invalid escape sequence", R"("\030")"); 188 ExpectErr("Invalid \\u escape sequence", R"("\usuck")"); 189 ExpectErr("[3:3, byte=19]", R"({ 190 "valid": 1, 191 invalid: 2 192 })"); 193 ExpectErr("Invalid UTF-8 sequence", "\"\xC0\x80\""); // WTF-8 null 194 } 195 196 // Direct tests of isUTF8 and fixUTF8. Internal uses are also tested elsewhere. 197 TEST(JSONTest, UTF8) { 198 for (const char *Valid : { 199 "this is ASCII text", 200 "thïs tëxt häs BMP chäräctërs", 201 "L C", 202 }) { 203 EXPECT_TRUE(isUTF8(Valid)) << Valid; 204 EXPECT_EQ(fixUTF8(Valid), Valid); 205 } 206 for (auto Invalid : std::vector<std::pair<const char *, const char *>>{ 207 {"lone trailing \x81\x82 bytes", "lone trailing �� bytes"}, 208 {"missing trailing \xD0 bytes", "missing trailing � bytes"}, 209 {"truncated character \xD0", "truncated character �"}, 210 {"not \xC1\x80 the \xE0\x9f\xBF shortest \xF0\x83\x83\x83 encoding", 211 "not �� the ��� shortest ���� encoding"}, 212 {"too \xF9\x80\x80\x80\x80 long", "too ����� long"}, 213 {"surrogate \xED\xA0\x80 invalid \xF4\x90\x80\x80", 214 "surrogate ��� invalid ����"}}) { 215 EXPECT_FALSE(isUTF8(Invalid.first)) << Invalid.first; 216 EXPECT_EQ(fixUTF8(Invalid.first), Invalid.second); 217 } 218 } 219 220 TEST(JSONTest, Inspection) { 221 llvm::Expected<Value> Doc = parse(R"( 222 { 223 "null": null, 224 "boolean": false, 225 "number": 2.78, 226 "string": "json", 227 "array": [null, true, 3.14, "hello", [1,2,3], {"time": "arrow"}], 228 "object": {"fruit": "banana"} 229 } 230 )"); 231 EXPECT_TRUE(!!Doc); 232 233 Object *O = Doc->getAsObject(); 234 ASSERT_TRUE(O); 235 236 EXPECT_FALSE(O->getNull("missing")); 237 EXPECT_FALSE(O->getNull("boolean")); 238 EXPECT_TRUE(O->getNull("null")); 239 240 EXPECT_EQ(O->getNumber("number"), llvm::Optional<double>(2.78)); 241 EXPECT_FALSE(O->getInteger("number")); 242 EXPECT_EQ(O->getString("string"), llvm::Optional<llvm::StringRef>("json")); 243 ASSERT_FALSE(O->getObject("missing")); 244 ASSERT_FALSE(O->getObject("array")); 245 ASSERT_TRUE(O->getObject("object")); 246 EXPECT_EQ(*O->getObject("object"), (Object{{"fruit", "banana"}})); 247 248 Array *A = O->getArray("array"); 249 ASSERT_TRUE(A); 250 EXPECT_EQ((*A)[1].getAsBoolean(), llvm::Optional<bool>(true)); 251 ASSERT_TRUE((*A)[4].getAsArray()); 252 EXPECT_EQ(*(*A)[4].getAsArray(), (Array{1, 2, 3})); 253 EXPECT_EQ((*(*A)[4].getAsArray())[1].getAsInteger(), 254 llvm::Optional<int64_t>(2)); 255 int I = 0; 256 for (Value &E : *A) { 257 if (I++ == 5) { 258 ASSERT_TRUE(E.getAsObject()); 259 EXPECT_EQ(E.getAsObject()->getString("time"), 260 llvm::Optional<llvm::StringRef>("arrow")); 261 } else 262 EXPECT_FALSE(E.getAsObject()); 263 } 264 } 265 266 // Verify special integer handling - we try to preserve exact int64 values. 267 TEST(JSONTest, Integers) { 268 struct { 269 const char *Desc; 270 Value Val; 271 const char *Str; 272 llvm::Optional<int64_t> AsInt; 273 llvm::Optional<double> AsNumber; 274 } TestCases[] = { 275 { 276 "Non-integer. Stored as double, not convertible.", 277 double{1.5}, 278 "1.5", 279 llvm::None, 280 1.5, 281 }, 282 283 { 284 "Integer, not exact double. Stored as int64, convertible.", 285 int64_t{0x4000000000000001}, 286 "4611686018427387905", 287 int64_t{0x4000000000000001}, 288 double{0x4000000000000000}, 289 }, 290 291 { 292 "Negative integer, not exact double. Stored as int64, convertible.", 293 int64_t{-0x4000000000000001}, 294 "-4611686018427387905", 295 int64_t{-0x4000000000000001}, 296 double{-0x4000000000000000}, 297 }, 298 299 { 300 "Dynamically exact integer. Stored as double, convertible.", 301 double{0x6000000000000000}, 302 "6.9175290276410819e+18", 303 int64_t{0x6000000000000000}, 304 double{0x6000000000000000}, 305 }, 306 307 { 308 "Dynamically integer, >64 bits. Stored as double, not convertible.", 309 1.5 * double{0x8000000000000000}, 310 "1.3835058055282164e+19", 311 llvm::None, 312 1.5 * double{0x8000000000000000}, 313 }, 314 }; 315 for (const auto &T : TestCases) { 316 EXPECT_EQ(T.Str, s(T.Val)) << T.Desc; 317 llvm::Expected<Value> Doc = parse(T.Str); 318 EXPECT_TRUE(!!Doc) << T.Desc; 319 EXPECT_EQ(Doc->getAsInteger(), T.AsInt) << T.Desc; 320 EXPECT_EQ(Doc->getAsNumber(), T.AsNumber) << T.Desc; 321 EXPECT_EQ(T.Val, *Doc) << T.Desc; 322 EXPECT_EQ(T.Str, s(*Doc)) << T.Desc; 323 } 324 } 325 326 // Sample struct with typical JSON-mapping rules. 327 struct CustomStruct { 328 CustomStruct() : B(false) {} 329 CustomStruct(std::string S, llvm::Optional<int> I, bool B) 330 : S(S), I(I), B(B) {} 331 std::string S; 332 llvm::Optional<int> I; 333 bool B; 334 }; 335 inline bool operator==(const CustomStruct &L, const CustomStruct &R) { 336 return L.S == R.S && L.I == R.I && L.B == R.B; 337 } 338 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, 339 const CustomStruct &S) { 340 return OS << "(" << S.S << ", " << (S.I ? std::to_string(*S.I) : "None") 341 << ", " << S.B << ")"; 342 } 343 bool fromJSON(const Value &E, CustomStruct &R) { 344 ObjectMapper O(E); 345 if (!O || !O.map("str", R.S) || !O.map("int", R.I)) 346 return false; 347 O.map("bool", R.B); 348 return true; 349 } 350 351 TEST(JSONTest, Deserialize) { 352 std::map<std::string, std::vector<CustomStruct>> R; 353 CustomStruct ExpectedStruct = {"foo", 42, true}; 354 std::map<std::string, std::vector<CustomStruct>> Expected; 355 Value J = Object{ 356 {"foo", 357 Array{ 358 Object{ 359 {"str", "foo"}, 360 {"int", 42}, 361 {"bool", true}, 362 {"unknown", "ignored"}, 363 }, 364 Object{{"str", "bar"}}, 365 Object{ 366 {"str", "baz"}, {"bool", "string"}, // OK, deserialize ignores. 367 }, 368 }}}; 369 Expected["foo"] = { 370 CustomStruct("foo", 42, true), 371 CustomStruct("bar", llvm::None, false), 372 CustomStruct("baz", llvm::None, false), 373 }; 374 ASSERT_TRUE(fromJSON(J, R)); 375 EXPECT_EQ(R, Expected); 376 377 CustomStruct V; 378 EXPECT_FALSE(fromJSON(nullptr, V)) << "Not an object " << V; 379 EXPECT_FALSE(fromJSON(Object{}, V)) << "Missing required field " << V; 380 EXPECT_FALSE(fromJSON(Object{{"str", 1}}, V)) << "Wrong type " << V; 381 // Optional<T> must parse as the correct type if present. 382 EXPECT_FALSE(fromJSON(Object{{"str", 1}, {"int", "string"}}, V)) 383 << "Wrong type for Optional<T> " << V; 384 } 385 386 } // namespace 387 } // namespace json 388 } // namespace llvm 389