1*549b59edSchristos /* $NetBSD: base64.c,v 1.3 2021/08/14 16:14:58 christos Exp $ */
24e6df137Slukem
32de962bdSlukem /* base64.c -- routines to encode/decode base64 data */
4d11b170bStron /* $OpenLDAP$ */
52de962bdSlukem /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
62de962bdSlukem *
7*549b59edSchristos * Copyright 1998-2021 The OpenLDAP Foundation.
82de962bdSlukem * Portions Copyright 1998-2003 Kurt D. Zeilenga.
92de962bdSlukem * Portions Copyright 1995 IBM Corporation.
102de962bdSlukem * All rights reserved.
112de962bdSlukem *
122de962bdSlukem * Redistribution and use in source and binary forms, with or without
132de962bdSlukem * modification, are permitted only as authorized by the OpenLDAP
142de962bdSlukem * Public License.
152de962bdSlukem *
162de962bdSlukem * A copy of this license is available in the file LICENSE in the
172de962bdSlukem * top-level directory of the distribution or, alternatively, at
182de962bdSlukem * <http://www.OpenLDAP.org/license.html>.
192de962bdSlukem */
202de962bdSlukem /* Portions Copyright (c) 1996, 1998 by Internet Software Consortium.
212de962bdSlukem *
222de962bdSlukem * Permission to use, copy, modify, and distribute this software for any
232de962bdSlukem * purpose with or without fee is hereby granted, provided that the above
242de962bdSlukem * copyright notice and this permission notice appear in all copies.
252de962bdSlukem *
262de962bdSlukem * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
272de962bdSlukem * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
282de962bdSlukem * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
292de962bdSlukem * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
302de962bdSlukem * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
312de962bdSlukem * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
322de962bdSlukem * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
332de962bdSlukem * SOFTWARE.
342de962bdSlukem */
352de962bdSlukem /* This work is based upon Base64 routines (developed by IBM) found
362de962bdSlukem * Berkeley Internet Name Daemon (BIND) as distributed by ISC. They
372de962bdSlukem * were adapted for inclusion in OpenLDAP Software by Kurt D. Zeilenga.
382de962bdSlukem */
392de962bdSlukem
40376af7d7Schristos #include <sys/cdefs.h>
41*549b59edSchristos __RCSID("$NetBSD: base64.c,v 1.3 2021/08/14 16:14:58 christos Exp $");
42376af7d7Schristos
432de962bdSlukem #include "portable.h"
442de962bdSlukem
452de962bdSlukem #include <ac/assert.h>
462de962bdSlukem #include <ac/stdlib.h>
472de962bdSlukem #include <ac/ctype.h>
482de962bdSlukem #include <ac/string.h>
492de962bdSlukem
502de962bdSlukem /* include socket.h to get sys/types.h and/or winsock2.h */
512de962bdSlukem #include <ac/socket.h>
522de962bdSlukem
532de962bdSlukem #include "lutil.h"
542de962bdSlukem
552de962bdSlukem static const char Base64[] =
562de962bdSlukem "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
572de962bdSlukem static const char Pad64 = '=';
582de962bdSlukem
592de962bdSlukem /* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
602de962bdSlukem The following encoding technique is taken from RFC 1521 by Borenstein
612de962bdSlukem and Freed. It is reproduced here in a slightly edited form for
622de962bdSlukem convenience.
632de962bdSlukem
642de962bdSlukem A 65-character subset of US-ASCII is used, enabling 6 bits to be
652de962bdSlukem represented per printable character. (The extra 65th character, "=",
662de962bdSlukem is used to signify a special processing function.)
672de962bdSlukem
682de962bdSlukem The encoding process represents 24-bit groups of input bits as output
692de962bdSlukem strings of 4 encoded characters. Proceeding from left to right, a
702de962bdSlukem 24-bit input group is formed by concatenating 3 8-bit input groups.
712de962bdSlukem These 24 bits are then treated as 4 concatenated 6-bit groups, each
722de962bdSlukem of which is translated into a single digit in the base64 alphabet.
732de962bdSlukem
742de962bdSlukem Each 6-bit group is used as an index into an array of 64 printable
752de962bdSlukem characters. The character referenced by the index is placed in the
762de962bdSlukem output string.
772de962bdSlukem
782de962bdSlukem Table 1: The Base64 Alphabet
792de962bdSlukem
802de962bdSlukem Value Encoding Value Encoding Value Encoding Value Encoding
812de962bdSlukem 0 A 17 R 34 i 51 z
822de962bdSlukem 1 B 18 S 35 j 52 0
832de962bdSlukem 2 C 19 T 36 k 53 1
842de962bdSlukem 3 D 20 U 37 l 54 2
852de962bdSlukem 4 E 21 V 38 m 55 3
862de962bdSlukem 5 F 22 W 39 n 56 4
872de962bdSlukem 6 G 23 X 40 o 57 5
882de962bdSlukem 7 H 24 Y 41 p 58 6
892de962bdSlukem 8 I 25 Z 42 q 59 7
902de962bdSlukem 9 J 26 a 43 r 60 8
912de962bdSlukem 10 K 27 b 44 s 61 9
922de962bdSlukem 11 L 28 c 45 t 62 +
932de962bdSlukem 12 M 29 d 46 u 63 /
942de962bdSlukem 13 N 30 e 47 v
952de962bdSlukem 14 O 31 f 48 w (pad) =
962de962bdSlukem 15 P 32 g 49 x
972de962bdSlukem 16 Q 33 h 50 y
982de962bdSlukem
992de962bdSlukem Special processing is performed if fewer than 24 bits are available
1002de962bdSlukem at the end of the data being encoded. A full encoding quantum is
1012de962bdSlukem always completed at the end of a quantity. When fewer than 24 input
1022de962bdSlukem bits are available in an input group, zero bits are added (on the
1032de962bdSlukem right) to form an integral number of 6-bit groups. Padding at the
1042de962bdSlukem end of the data is performed using the '=' character.
1052de962bdSlukem
1062de962bdSlukem Since all base64 input is an integral number of octets, only the
1072de962bdSlukem -------------------------------------------------
1082de962bdSlukem following cases can arise:
1092de962bdSlukem
1102de962bdSlukem (1) the final quantum of encoding input is an integral
1112de962bdSlukem multiple of 24 bits; here, the final unit of encoded
1122de962bdSlukem output will be an integral multiple of 4 characters
1132de962bdSlukem with no "=" padding,
1142de962bdSlukem (2) the final quantum of encoding input is exactly 8 bits;
1152de962bdSlukem here, the final unit of encoded output will be two
1162de962bdSlukem characters followed by two "=" padding characters, or
1172de962bdSlukem (3) the final quantum of encoding input is exactly 16 bits;
1182de962bdSlukem here, the final unit of encoded output will be three
1192de962bdSlukem characters followed by one "=" padding character.
1202de962bdSlukem */
1212de962bdSlukem
1222de962bdSlukem int
lutil_b64_ntop(u_char const * src,size_t srclength,char * target,size_t targsize)1232de962bdSlukem lutil_b64_ntop(
1242de962bdSlukem u_char const *src,
1252de962bdSlukem size_t srclength,
1262de962bdSlukem char *target,
1272de962bdSlukem size_t targsize)
1282de962bdSlukem {
1292de962bdSlukem size_t datalength = 0;
1302de962bdSlukem u_char input[3];
1312de962bdSlukem u_char output[4];
1322de962bdSlukem size_t i;
1332de962bdSlukem
1342de962bdSlukem while (2 < srclength) {
1352de962bdSlukem input[0] = *src++;
1362de962bdSlukem input[1] = *src++;
1372de962bdSlukem input[2] = *src++;
1382de962bdSlukem srclength -= 3;
1392de962bdSlukem
1402de962bdSlukem output[0] = input[0] >> 2;
1412de962bdSlukem output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
1422de962bdSlukem output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
1432de962bdSlukem output[3] = input[2] & 0x3f;
1442de962bdSlukem assert(output[0] < 64);
1452de962bdSlukem assert(output[1] < 64);
1462de962bdSlukem assert(output[2] < 64);
1472de962bdSlukem assert(output[3] < 64);
1482de962bdSlukem
1492de962bdSlukem if (datalength + 4 > targsize)
1502de962bdSlukem return (-1);
1512de962bdSlukem target[datalength++] = Base64[output[0]];
1522de962bdSlukem target[datalength++] = Base64[output[1]];
1532de962bdSlukem target[datalength++] = Base64[output[2]];
1542de962bdSlukem target[datalength++] = Base64[output[3]];
1552de962bdSlukem }
1562de962bdSlukem
1572de962bdSlukem /* Now we worry about padding. */
1582de962bdSlukem if (0 != srclength) {
1592de962bdSlukem /* Get what's left. */
1602de962bdSlukem input[0] = input[1] = input[2] = '\0';
1612de962bdSlukem for (i = 0; i < srclength; i++)
1622de962bdSlukem input[i] = *src++;
1632de962bdSlukem
1642de962bdSlukem output[0] = input[0] >> 2;
1652de962bdSlukem output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
1662de962bdSlukem output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
1672de962bdSlukem assert(output[0] < 64);
1682de962bdSlukem assert(output[1] < 64);
1692de962bdSlukem assert(output[2] < 64);
1702de962bdSlukem
1712de962bdSlukem if (datalength + 4 > targsize)
1722de962bdSlukem return (-1);
1732de962bdSlukem target[datalength++] = Base64[output[0]];
1742de962bdSlukem target[datalength++] = Base64[output[1]];
1752de962bdSlukem if (srclength == 1)
1762de962bdSlukem target[datalength++] = Pad64;
1772de962bdSlukem else
1782de962bdSlukem target[datalength++] = Base64[output[2]];
1792de962bdSlukem target[datalength++] = Pad64;
1802de962bdSlukem }
1812de962bdSlukem if (datalength >= targsize)
1822de962bdSlukem return (-1);
1832de962bdSlukem target[datalength] = '\0'; /* Returned value doesn't count \0. */
1842de962bdSlukem return (datalength);
1852de962bdSlukem }
1862de962bdSlukem
1872de962bdSlukem /* skips all whitespace anywhere.
1882de962bdSlukem converts characters, four at a time, starting at (or after)
1892de962bdSlukem src from base - 64 numbers into three 8 bit bytes in the target area.
1902de962bdSlukem it returns the number of data bytes stored at the target, or -1 on error.
1912de962bdSlukem */
1922de962bdSlukem
1932de962bdSlukem int
lutil_b64_pton(char const * src,u_char * target,size_t targsize)1942de962bdSlukem lutil_b64_pton(
1952de962bdSlukem char const *src,
1962de962bdSlukem u_char *target,
1972de962bdSlukem size_t targsize)
1982de962bdSlukem {
1992de962bdSlukem int tarindex, state, ch;
2002de962bdSlukem char *pos;
2012de962bdSlukem
2022de962bdSlukem state = 0;
2032de962bdSlukem tarindex = 0;
2042de962bdSlukem
2052de962bdSlukem while ((ch = *src++) != '\0') {
2062de962bdSlukem if (isascii(ch) && isspace(ch)) /* Skip whitespace anywhere. */
2072de962bdSlukem continue;
2082de962bdSlukem
2092de962bdSlukem if (ch == Pad64)
2102de962bdSlukem break;
2112de962bdSlukem
2122de962bdSlukem pos = strchr(Base64, ch);
2132de962bdSlukem if (pos == 0) /* A non-base64 character. */
2142de962bdSlukem return (-1);
2152de962bdSlukem
2162de962bdSlukem switch (state) {
2172de962bdSlukem case 0:
2182de962bdSlukem if (target) {
2192de962bdSlukem if ((size_t)tarindex >= targsize)
2202de962bdSlukem return (-1);
2212de962bdSlukem target[tarindex] = (pos - Base64) << 2;
2222de962bdSlukem }
2232de962bdSlukem state = 1;
2242de962bdSlukem break;
2252de962bdSlukem case 1:
2262de962bdSlukem if (target) {
2272de962bdSlukem if ((size_t)tarindex + 1 >= targsize)
2282de962bdSlukem return (-1);
2292de962bdSlukem target[tarindex] |= (pos - Base64) >> 4;
2302de962bdSlukem target[tarindex+1] = ((pos - Base64) & 0x0f)
2312de962bdSlukem << 4 ;
2322de962bdSlukem }
2332de962bdSlukem tarindex++;
2342de962bdSlukem state = 2;
2352de962bdSlukem break;
2362de962bdSlukem case 2:
2372de962bdSlukem if (target) {
2382de962bdSlukem if ((size_t)tarindex + 1 >= targsize)
2392de962bdSlukem return (-1);
2402de962bdSlukem target[tarindex] |= (pos - Base64) >> 2;
2412de962bdSlukem target[tarindex+1] = ((pos - Base64) & 0x03)
2422de962bdSlukem << 6;
2432de962bdSlukem }
2442de962bdSlukem tarindex++;
2452de962bdSlukem state = 3;
2462de962bdSlukem break;
2472de962bdSlukem case 3:
2482de962bdSlukem if (target) {
2492de962bdSlukem if ((size_t)tarindex >= targsize)
2502de962bdSlukem return (-1);
2512de962bdSlukem target[tarindex] |= (pos - Base64);
2522de962bdSlukem }
2532de962bdSlukem tarindex++;
2542de962bdSlukem state = 0;
2552de962bdSlukem break;
2562de962bdSlukem default:
2572de962bdSlukem abort();
2582de962bdSlukem }
2592de962bdSlukem }
2602de962bdSlukem
2612de962bdSlukem /*
2622de962bdSlukem * We are done decoding Base-64 chars. Let's see if we ended
2632de962bdSlukem * on a byte boundary, and/or with erroneous trailing characters.
2642de962bdSlukem */
2652de962bdSlukem
2662de962bdSlukem if (ch == Pad64) { /* We got a pad char. */
2672de962bdSlukem ch = *src++; /* Skip it, get next. */
2682de962bdSlukem switch (state) {
2692de962bdSlukem case 0: /* Invalid = in first position */
2702de962bdSlukem case 1: /* Invalid = in second position */
2712de962bdSlukem return (-1);
2722de962bdSlukem
2732de962bdSlukem case 2: /* Valid, means one byte of info */
2742de962bdSlukem /* Skip any number of spaces. */
2752de962bdSlukem for ((void)NULL; ch != '\0'; ch = *src++)
2762de962bdSlukem if (! (isascii(ch) && isspace(ch)))
2772de962bdSlukem break;
2782de962bdSlukem /* Make sure there is another trailing = sign. */
2792de962bdSlukem if (ch != Pad64)
2802de962bdSlukem return (-1);
2812de962bdSlukem ch = *src++; /* Skip the = */
2822de962bdSlukem /* Fall through to "single trailing =" case. */
2832de962bdSlukem /* FALLTHROUGH */
2842de962bdSlukem
2852de962bdSlukem case 3: /* Valid, means two bytes of info */
2862de962bdSlukem /*
2872de962bdSlukem * We know this char is an =. Is there anything but
2882de962bdSlukem * whitespace after it?
2892de962bdSlukem */
2902de962bdSlukem for ((void)NULL; ch != '\0'; ch = *src++)
2912de962bdSlukem if (! (isascii(ch) && isspace(ch)))
2922de962bdSlukem return (-1);
2932de962bdSlukem
2942de962bdSlukem /*
2952de962bdSlukem * Now make sure for cases 2 and 3 that the "extra"
2962de962bdSlukem * bits that slopped past the last full byte were
2972de962bdSlukem * zeros. If we don't check them, they become a
2982de962bdSlukem * subliminal channel.
2992de962bdSlukem */
3002de962bdSlukem if (target && target[tarindex] != 0)
3012de962bdSlukem return (-1);
3022de962bdSlukem }
3032de962bdSlukem } else {
3042de962bdSlukem /*
3052de962bdSlukem * We ended by seeing the end of the string. Make sure we
3062de962bdSlukem * have no partial bytes lying around.
3072de962bdSlukem */
3082de962bdSlukem if (state != 0)
3092de962bdSlukem return (-1);
3102de962bdSlukem }
3112de962bdSlukem
3122de962bdSlukem return (tarindex);
3132de962bdSlukem }
314