xref: /freebsd-src/contrib/llvm-project/llvm/lib/Support/Base64.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1*bdd1243dSDimitry Andric //===- Base64.cpp ---------------------------------------------------------===//
2*bdd1243dSDimitry Andric //
3*bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*bdd1243dSDimitry Andric //
7*bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
8*bdd1243dSDimitry Andric 
9*bdd1243dSDimitry Andric #define INVALID_BASE64_BYTE 64
10*bdd1243dSDimitry Andric #include "llvm/Support/Base64.h"
11*bdd1243dSDimitry Andric 
decodeBase64Byte(uint8_t Ch)12*bdd1243dSDimitry Andric static char decodeBase64Byte(uint8_t Ch) {
13*bdd1243dSDimitry Andric   constexpr char Inv = INVALID_BASE64_BYTE;
14*bdd1243dSDimitry Andric   static const char DecodeTable[] = {
15*bdd1243dSDimitry Andric       Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
16*bdd1243dSDimitry Andric       Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
17*bdd1243dSDimitry Andric       Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
18*bdd1243dSDimitry Andric       Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
19*bdd1243dSDimitry Andric       Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
20*bdd1243dSDimitry Andric       Inv, Inv, Inv, 62,  Inv, Inv, Inv, 63,  // ...+.../
21*bdd1243dSDimitry Andric       52,  53,  54,  55,  56,  57,  58,  59,  // 01234567
22*bdd1243dSDimitry Andric       60,  61,  Inv, Inv, Inv, 0,   Inv, Inv, // 89...=..
23*bdd1243dSDimitry Andric       Inv, 0,   1,   2,   3,   4,   5,   6,   // .ABCDEFG
24*bdd1243dSDimitry Andric       7,   8,   9,   10,  11,  12,  13,  14,  // HIJKLMNO
25*bdd1243dSDimitry Andric       15,  16,  17,  18,  19,  20,  21,  22,  // PQRSTUVW
26*bdd1243dSDimitry Andric       23,  24,  25,  Inv, Inv, Inv, Inv, Inv, // XYZ.....
27*bdd1243dSDimitry Andric       Inv, 26,  27,  28,  29,  30,  31,  32,  // .abcdefg
28*bdd1243dSDimitry Andric       33,  34,  35,  36,  37,  38,  39,  40,  // hijklmno
29*bdd1243dSDimitry Andric       41,  42,  43,  44,  45,  46,  47,  48,  // pqrstuvw
30*bdd1243dSDimitry Andric       49,  50,  51                            // xyz.....
31*bdd1243dSDimitry Andric   };
32*bdd1243dSDimitry Andric   if (Ch >= sizeof(DecodeTable))
33*bdd1243dSDimitry Andric     return Inv;
34*bdd1243dSDimitry Andric   return DecodeTable[Ch];
35*bdd1243dSDimitry Andric }
36*bdd1243dSDimitry Andric 
decodeBase64(llvm::StringRef Input,std::vector<char> & Output)37*bdd1243dSDimitry Andric llvm::Error llvm::decodeBase64(llvm::StringRef Input,
38*bdd1243dSDimitry Andric                                std::vector<char> &Output) {
39*bdd1243dSDimitry Andric   constexpr char Base64InvalidByte = INVALID_BASE64_BYTE;
40*bdd1243dSDimitry Andric   // Invalid table value with short name to fit in the table init below. The
41*bdd1243dSDimitry Andric   // invalid value is 64 since valid base64 values are 0 - 63.
42*bdd1243dSDimitry Andric   Output.clear();
43*bdd1243dSDimitry Andric   const uint64_t InputLength = Input.size();
44*bdd1243dSDimitry Andric   if (InputLength == 0)
45*bdd1243dSDimitry Andric     return Error::success();
46*bdd1243dSDimitry Andric   // Make sure we have a valid input string length which must be a multiple
47*bdd1243dSDimitry Andric   // of 4.
48*bdd1243dSDimitry Andric   if ((InputLength % 4) != 0)
49*bdd1243dSDimitry Andric     return createStringError(std::errc::illegal_byte_sequence,
50*bdd1243dSDimitry Andric                              "Base64 encoded strings must be a multiple of 4 "
51*bdd1243dSDimitry Andric                              "bytes in length");
52*bdd1243dSDimitry Andric   const uint64_t FirstValidEqualIdx = InputLength - 2;
53*bdd1243dSDimitry Andric   char Hex64Bytes[4];
54*bdd1243dSDimitry Andric   for (uint64_t Idx = 0; Idx < InputLength; Idx += 4) {
55*bdd1243dSDimitry Andric     for (uint64_t ByteOffset = 0; ByteOffset < 4; ++ByteOffset) {
56*bdd1243dSDimitry Andric       const uint64_t ByteIdx = Idx + ByteOffset;
57*bdd1243dSDimitry Andric       const char Byte = Input[ByteIdx];
58*bdd1243dSDimitry Andric       const char DecodedByte = decodeBase64Byte(Byte);
59*bdd1243dSDimitry Andric       bool Illegal = DecodedByte == Base64InvalidByte;
60*bdd1243dSDimitry Andric       if (!Illegal && Byte == '=') {
61*bdd1243dSDimitry Andric         if (ByteIdx < FirstValidEqualIdx) {
62*bdd1243dSDimitry Andric           // We have an '=' in the middle of the string which is invalid, only
63*bdd1243dSDimitry Andric           // the last two characters can be '=' characters.
64*bdd1243dSDimitry Andric           Illegal = true;
65*bdd1243dSDimitry Andric         } else if (ByteIdx == FirstValidEqualIdx && Input[ByteIdx + 1] != '=') {
66*bdd1243dSDimitry Andric           // We have an equal second to last from the end and the last character
67*bdd1243dSDimitry Andric           // is not also an equal, so the '=' character is invalid
68*bdd1243dSDimitry Andric           Illegal = true;
69*bdd1243dSDimitry Andric         }
70*bdd1243dSDimitry Andric       }
71*bdd1243dSDimitry Andric       if (Illegal)
72*bdd1243dSDimitry Andric         return createStringError(
73*bdd1243dSDimitry Andric             std::errc::illegal_byte_sequence,
74*bdd1243dSDimitry Andric             "Invalid Base64 character %#2.2x at index %" PRIu64, Byte, ByteIdx);
75*bdd1243dSDimitry Andric       Hex64Bytes[ByteOffset] = DecodedByte;
76*bdd1243dSDimitry Andric     }
77*bdd1243dSDimitry Andric     // Now we have 6 bits of 3 bytes in value in each of the Hex64Bytes bytes.
78*bdd1243dSDimitry Andric     // Extract the right bytes into the Output buffer.
79*bdd1243dSDimitry Andric     Output.push_back((Hex64Bytes[0] << 2) + ((Hex64Bytes[1] >> 4) & 0x03));
80*bdd1243dSDimitry Andric     Output.push_back((Hex64Bytes[1] << 4) + ((Hex64Bytes[2] >> 2) & 0x0f));
81*bdd1243dSDimitry Andric     Output.push_back((Hex64Bytes[2] << 6) + (Hex64Bytes[3] & 0x3f));
82*bdd1243dSDimitry Andric   }
83*bdd1243dSDimitry Andric   // If we had valid trailing '=' characters strip the right number of bytes
84*bdd1243dSDimitry Andric   // from the end of the output buffer. We already know that the Input length
85*bdd1243dSDimitry Andric   // it a multiple of 4 and is not zero, so direct character access is safe.
86*bdd1243dSDimitry Andric   if (Input.back() == '=') {
87*bdd1243dSDimitry Andric     Output.pop_back();
88*bdd1243dSDimitry Andric     if (Input[InputLength - 2] == '=')
89*bdd1243dSDimitry Andric       Output.pop_back();
90*bdd1243dSDimitry Andric   }
91*bdd1243dSDimitry Andric   return Error::success();
92*bdd1243dSDimitry Andric }
93