xref: /llvm-project/llvm/unittests/Support/ConvertEBCDICTest.cpp (revision b42718dcecdd6787e0fde826ef7377f4e3cdd7bd)
1*b42718dcSNeumann Hon //===- unittests/Support/ConvertEBCDICTest.cpp - EBCDIC/UTF8 conversion tests
2*b42718dcSNeumann Hon //-===//
3*b42718dcSNeumann Hon //
4*b42718dcSNeumann Hon // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5*b42718dcSNeumann Hon // See https://llvm.org/LICENSE.txt for license information.
6*b42718dcSNeumann Hon // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7*b42718dcSNeumann Hon //
8*b42718dcSNeumann Hon //===--------------------------------------------------------------------------===//
9*b42718dcSNeumann Hon 
10*b42718dcSNeumann Hon #include "llvm/Support/ConvertEBCDIC.h"
11*b42718dcSNeumann Hon #include "llvm/ADT/SmallString.h"
12*b42718dcSNeumann Hon #include "gtest/gtest.h"
13*b42718dcSNeumann Hon using namespace llvm;
14*b42718dcSNeumann Hon 
15*b42718dcSNeumann Hon namespace {
16*b42718dcSNeumann Hon 
17*b42718dcSNeumann Hon // String "Hello World!"
18*b42718dcSNeumann Hon static const char HelloA[] =
19*b42718dcSNeumann Hon     "\x48\x65\x6C\x6C\x6F\x20\x57\x6F\x72\x6C\x64\x21\x0a";
20*b42718dcSNeumann Hon static const char HelloE[] =
21*b42718dcSNeumann Hon     "\xC8\x85\x93\x93\x96\x40\xE6\x96\x99\x93\x84\x5A\x15";
22*b42718dcSNeumann Hon 
23*b42718dcSNeumann Hon // String "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
24*b42718dcSNeumann Hon static const char ABCStrA[] =
25*b42718dcSNeumann Hon     "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52"
26*b42718dcSNeumann Hon     "\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A"
27*b42718dcSNeumann Hon     "\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A";
28*b42718dcSNeumann Hon static const char ABCStrE[] =
29*b42718dcSNeumann Hon     "\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9"
30*b42718dcSNeumann Hon     "\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\x81\x82\x83\x84\x85\x86\x87\x88\x89\x91"
31*b42718dcSNeumann Hon     "\x92\x93\x94\x95\x96\x97\x98\x99\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9";
32*b42718dcSNeumann Hon 
33*b42718dcSNeumann Hon // String "¡¢£AÄÅÆEÈÉÊaàáâãäeèéêë"
34*b42718dcSNeumann Hon static const char AccentUTF[] =
35*b42718dcSNeumann Hon     "\xc2\xa1\xc2\xa2\xc2\xa3\x41\xc3\x84\xc3\x85\xc3\x86\x45\xc3\x88\xc3\x89"
36*b42718dcSNeumann Hon     "\xc3\x8a\x61\xc3\xa0\xc3\xa1\xc3\xa2\xc3\xa3\xc3\xa4\x65\xc3\xa8\xc3\xa9"
37*b42718dcSNeumann Hon     "\xc3\xaa\xc3\xab";
38*b42718dcSNeumann Hon static const char AccentE[] = "\xaa\x4a\xb1\xc1\x63\x67\x9e\xc5\x74\x71\x72"
39*b42718dcSNeumann Hon                               "\x81\x44\x45\x42\x46\x43\x85\x54\x51\x52\x53";
40*b42718dcSNeumann Hon 
41*b42718dcSNeumann Hon // String with Cyrillic character ya.
42*b42718dcSNeumann Hon static const char CyrillicUTF[] = "\xd0\xaf";
43*b42718dcSNeumann Hon 
TEST(CharSet,FromUTF8)44*b42718dcSNeumann Hon TEST(CharSet, FromUTF8) {
45*b42718dcSNeumann Hon   // Hello string.
46*b42718dcSNeumann Hon   StringRef Src(HelloA);
47*b42718dcSNeumann Hon   SmallString<64> Dst;
48*b42718dcSNeumann Hon 
49*b42718dcSNeumann Hon   std::error_code EC = ConverterEBCDIC::convertToEBCDIC(Src, Dst);
50*b42718dcSNeumann Hon   EXPECT_TRUE(!EC);
51*b42718dcSNeumann Hon   EXPECT_STREQ(HelloE, static_cast<std::string>(Dst).c_str());
52*b42718dcSNeumann Hon   Dst.clear();
53*b42718dcSNeumann Hon 
54*b42718dcSNeumann Hon   // ABC string.
55*b42718dcSNeumann Hon   Src = ABCStrA;
56*b42718dcSNeumann Hon   EC = ConverterEBCDIC::convertToEBCDIC(Src, Dst);
57*b42718dcSNeumann Hon   EXPECT_TRUE(!EC);
58*b42718dcSNeumann Hon   EXPECT_STREQ(ABCStrE, static_cast<std::string>(Dst).c_str());
59*b42718dcSNeumann Hon   Dst.clear();
60*b42718dcSNeumann Hon 
61*b42718dcSNeumann Hon   // Accent string.
62*b42718dcSNeumann Hon   Src = AccentUTF;
63*b42718dcSNeumann Hon   EC = ConverterEBCDIC::convertToEBCDIC(Src, Dst);
64*b42718dcSNeumann Hon   EXPECT_TRUE(!EC);
65*b42718dcSNeumann Hon   EXPECT_STREQ(AccentE, static_cast<std::string>(Dst).c_str());
66*b42718dcSNeumann Hon   Dst.clear();
67*b42718dcSNeumann Hon 
68*b42718dcSNeumann Hon   // Cyrillic string. Results in error because not representable in 1047.
69*b42718dcSNeumann Hon   Src = CyrillicUTF;
70*b42718dcSNeumann Hon   EC = ConverterEBCDIC::convertToEBCDIC(Src, Dst);
71*b42718dcSNeumann Hon   EXPECT_EQ(EC, std::errc::illegal_byte_sequence);
72*b42718dcSNeumann Hon   Dst.clear();
73*b42718dcSNeumann Hon }
74*b42718dcSNeumann Hon 
TEST(CharSet,ToUTF8)75*b42718dcSNeumann Hon TEST(CharSet, ToUTF8) {
76*b42718dcSNeumann Hon   // Hello string.
77*b42718dcSNeumann Hon   StringRef Src(HelloE);
78*b42718dcSNeumann Hon   SmallString<64> Dst;
79*b42718dcSNeumann Hon 
80*b42718dcSNeumann Hon   ConverterEBCDIC::convertToUTF8(Src, Dst);
81*b42718dcSNeumann Hon   EXPECT_STREQ(HelloA, static_cast<std::string>(Dst).c_str());
82*b42718dcSNeumann Hon   Dst.clear();
83*b42718dcSNeumann Hon 
84*b42718dcSNeumann Hon   // ABC string.
85*b42718dcSNeumann Hon   Src = ABCStrE;
86*b42718dcSNeumann Hon   ConverterEBCDIC::convertToUTF8(Src, Dst);
87*b42718dcSNeumann Hon   EXPECT_STREQ(ABCStrA, static_cast<std::string>(Dst).c_str());
88*b42718dcSNeumann Hon   Dst.clear();
89*b42718dcSNeumann Hon 
90*b42718dcSNeumann Hon   // Accent string.
91*b42718dcSNeumann Hon   Src = AccentE;
92*b42718dcSNeumann Hon   ConverterEBCDIC::convertToUTF8(Src, Dst);
93*b42718dcSNeumann Hon   EXPECT_STREQ(AccentUTF, static_cast<std::string>(Dst).c_str());
94*b42718dcSNeumann Hon   Dst.clear();
95*b42718dcSNeumann Hon }
96*b42718dcSNeumann Hon 
97*b42718dcSNeumann Hon } // namespace
98