1*ea9ac351SGreg Clayton //===- Base64.cpp ---------------------------------------------------------===//
2*ea9ac351SGreg Clayton //
3*ea9ac351SGreg Clayton // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*ea9ac351SGreg Clayton // See https://llvm.org/LICENSE.txt for license information.
5*ea9ac351SGreg Clayton // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*ea9ac351SGreg Clayton //
7*ea9ac351SGreg Clayton //===----------------------------------------------------------------------===//
8*ea9ac351SGreg Clayton
9*ea9ac351SGreg Clayton #define INVALID_BASE64_BYTE 64
10*ea9ac351SGreg Clayton #include "llvm/Support/Base64.h"
11*ea9ac351SGreg Clayton
decodeBase64Byte(uint8_t Ch)12*ea9ac351SGreg Clayton static char decodeBase64Byte(uint8_t Ch) {
13*ea9ac351SGreg Clayton constexpr char Inv = INVALID_BASE64_BYTE;
14*ea9ac351SGreg Clayton static const char DecodeTable[] = {
15*ea9ac351SGreg Clayton Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
16*ea9ac351SGreg Clayton Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
17*ea9ac351SGreg Clayton Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
18*ea9ac351SGreg Clayton Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
19*ea9ac351SGreg Clayton Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
20*ea9ac351SGreg Clayton Inv, Inv, Inv, 62, Inv, Inv, Inv, 63, // ...+.../
21*ea9ac351SGreg Clayton 52, 53, 54, 55, 56, 57, 58, 59, // 01234567
22*ea9ac351SGreg Clayton 60, 61, Inv, Inv, Inv, 0, Inv, Inv, // 89...=..
23*ea9ac351SGreg Clayton Inv, 0, 1, 2, 3, 4, 5, 6, // .ABCDEFG
24*ea9ac351SGreg Clayton 7, 8, 9, 10, 11, 12, 13, 14, // HIJKLMNO
25*ea9ac351SGreg Clayton 15, 16, 17, 18, 19, 20, 21, 22, // PQRSTUVW
26*ea9ac351SGreg Clayton 23, 24, 25, Inv, Inv, Inv, Inv, Inv, // XYZ.....
27*ea9ac351SGreg Clayton Inv, 26, 27, 28, 29, 30, 31, 32, // .abcdefg
28*ea9ac351SGreg Clayton 33, 34, 35, 36, 37, 38, 39, 40, // hijklmno
29*ea9ac351SGreg Clayton 41, 42, 43, 44, 45, 46, 47, 48, // pqrstuvw
30*ea9ac351SGreg Clayton 49, 50, 51 // xyz.....
31*ea9ac351SGreg Clayton };
32*ea9ac351SGreg Clayton if (Ch >= sizeof(DecodeTable))
33*ea9ac351SGreg Clayton return Inv;
34*ea9ac351SGreg Clayton return DecodeTable[Ch];
35*ea9ac351SGreg Clayton }
36*ea9ac351SGreg Clayton
decodeBase64(llvm::StringRef Input,std::vector<char> & Output)37*ea9ac351SGreg Clayton llvm::Error llvm::decodeBase64(llvm::StringRef Input,
38*ea9ac351SGreg Clayton std::vector<char> &Output) {
39*ea9ac351SGreg Clayton constexpr char Base64InvalidByte = INVALID_BASE64_BYTE;
40*ea9ac351SGreg Clayton // Invalid table value with short name to fit in the table init below. The
41*ea9ac351SGreg Clayton // invalid value is 64 since valid base64 values are 0 - 63.
42*ea9ac351SGreg Clayton Output.clear();
43*ea9ac351SGreg Clayton const uint64_t InputLength = Input.size();
44*ea9ac351SGreg Clayton if (InputLength == 0)
45*ea9ac351SGreg Clayton return Error::success();
46*ea9ac351SGreg Clayton // Make sure we have a valid input string length which must be a multiple
47*ea9ac351SGreg Clayton // of 4.
48*ea9ac351SGreg Clayton if ((InputLength % 4) != 0)
49*ea9ac351SGreg Clayton return createStringError(std::errc::illegal_byte_sequence,
50*ea9ac351SGreg Clayton "Base64 encoded strings must be a multiple of 4 "
51*ea9ac351SGreg Clayton "bytes in length");
52*ea9ac351SGreg Clayton const uint64_t FirstValidEqualIdx = InputLength - 2;
53*ea9ac351SGreg Clayton char Hex64Bytes[4];
54*ea9ac351SGreg Clayton for (uint64_t Idx = 0; Idx < InputLength; Idx += 4) {
55*ea9ac351SGreg Clayton for (uint64_t ByteOffset = 0; ByteOffset < 4; ++ByteOffset) {
56*ea9ac351SGreg Clayton const uint64_t ByteIdx = Idx + ByteOffset;
57*ea9ac351SGreg Clayton const char Byte = Input[ByteIdx];
58*ea9ac351SGreg Clayton const char DecodedByte = decodeBase64Byte(Byte);
59*ea9ac351SGreg Clayton bool Illegal = DecodedByte == Base64InvalidByte;
60*ea9ac351SGreg Clayton if (!Illegal && Byte == '=') {
61*ea9ac351SGreg Clayton if (ByteIdx < FirstValidEqualIdx) {
62*ea9ac351SGreg Clayton // We have an '=' in the middle of the string which is invalid, only
63*ea9ac351SGreg Clayton // the last two characters can be '=' characters.
64*ea9ac351SGreg Clayton Illegal = true;
65*ea9ac351SGreg Clayton } else if (ByteIdx == FirstValidEqualIdx && Input[ByteIdx + 1] != '=') {
66*ea9ac351SGreg Clayton // We have an equal second to last from the end and the last character
67*ea9ac351SGreg Clayton // is not also an equal, so the '=' character is invalid
68*ea9ac351SGreg Clayton Illegal = true;
69*ea9ac351SGreg Clayton }
70*ea9ac351SGreg Clayton }
71*ea9ac351SGreg Clayton if (Illegal)
72*ea9ac351SGreg Clayton return createStringError(
73*ea9ac351SGreg Clayton std::errc::illegal_byte_sequence,
74*ea9ac351SGreg Clayton "Invalid Base64 character %#2.2x at index %" PRIu64, Byte, ByteIdx);
75*ea9ac351SGreg Clayton Hex64Bytes[ByteOffset] = DecodedByte;
76*ea9ac351SGreg Clayton }
77*ea9ac351SGreg Clayton // Now we have 6 bits of 3 bytes in value in each of the Hex64Bytes bytes.
78*ea9ac351SGreg Clayton // Extract the right bytes into the Output buffer.
79*ea9ac351SGreg Clayton Output.push_back((Hex64Bytes[0] << 2) + ((Hex64Bytes[1] >> 4) & 0x03));
80*ea9ac351SGreg Clayton Output.push_back((Hex64Bytes[1] << 4) + ((Hex64Bytes[2] >> 2) & 0x0f));
81*ea9ac351SGreg Clayton Output.push_back((Hex64Bytes[2] << 6) + (Hex64Bytes[3] & 0x3f));
82*ea9ac351SGreg Clayton }
83*ea9ac351SGreg Clayton // If we had valid trailing '=' characters strip the right number of bytes
84*ea9ac351SGreg Clayton // from the end of the output buffer. We already know that the Input length
85*ea9ac351SGreg Clayton // it a multiple of 4 and is not zero, so direct character access is safe.
86*ea9ac351SGreg Clayton if (Input.back() == '=') {
87*ea9ac351SGreg Clayton Output.pop_back();
88*ea9ac351SGreg Clayton if (Input[InputLength - 2] == '=')
89*ea9ac351SGreg Clayton Output.pop_back();
90*ea9ac351SGreg Clayton }
91*ea9ac351SGreg Clayton return Error::success();
92*ea9ac351SGreg Clayton }
93