xref: /netbsd-src/external/bsd/openldap/dist/libraries/liblutil/base64.c (revision 549b59ed3ccf0d36d3097190a0db27b770f3a839)
1*549b59edSchristos /*	$NetBSD: base64.c,v 1.3 2021/08/14 16:14:58 christos Exp $	*/
24e6df137Slukem 
32de962bdSlukem /* base64.c -- routines to encode/decode base64 data */
4d11b170bStron /* $OpenLDAP$ */
52de962bdSlukem /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
62de962bdSlukem  *
7*549b59edSchristos  * Copyright 1998-2021 The OpenLDAP Foundation.
82de962bdSlukem  * Portions Copyright 1998-2003 Kurt D. Zeilenga.
92de962bdSlukem  * Portions Copyright 1995 IBM Corporation.
102de962bdSlukem  * All rights reserved.
112de962bdSlukem  *
122de962bdSlukem  * Redistribution and use in source and binary forms, with or without
132de962bdSlukem  * modification, are permitted only as authorized by the OpenLDAP
142de962bdSlukem  * Public License.
152de962bdSlukem  *
162de962bdSlukem  * A copy of this license is available in the file LICENSE in the
172de962bdSlukem  * top-level directory of the distribution or, alternatively, at
182de962bdSlukem  * <http://www.OpenLDAP.org/license.html>.
192de962bdSlukem  */
202de962bdSlukem /* Portions Copyright (c) 1996, 1998 by Internet Software Consortium.
212de962bdSlukem  *
222de962bdSlukem  * Permission to use, copy, modify, and distribute this software for any
232de962bdSlukem  * purpose with or without fee is hereby granted, provided that the above
242de962bdSlukem  * copyright notice and this permission notice appear in all copies.
252de962bdSlukem  *
262de962bdSlukem  * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
272de962bdSlukem  * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
282de962bdSlukem  * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
292de962bdSlukem  * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
302de962bdSlukem  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
312de962bdSlukem  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
322de962bdSlukem  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
332de962bdSlukem  * SOFTWARE.
342de962bdSlukem  */
352de962bdSlukem /* This work is based upon Base64 routines (developed by IBM) found
362de962bdSlukem  * Berkeley Internet Name Daemon (BIND) as distributed by ISC.  They
372de962bdSlukem  * were adapted for inclusion in OpenLDAP Software by Kurt D. Zeilenga.
382de962bdSlukem  */
392de962bdSlukem 
40376af7d7Schristos #include <sys/cdefs.h>
41*549b59edSchristos __RCSID("$NetBSD: base64.c,v 1.3 2021/08/14 16:14:58 christos Exp $");
42376af7d7Schristos 
432de962bdSlukem #include "portable.h"
442de962bdSlukem 
452de962bdSlukem #include <ac/assert.h>
462de962bdSlukem #include <ac/stdlib.h>
472de962bdSlukem #include <ac/ctype.h>
482de962bdSlukem #include <ac/string.h>
492de962bdSlukem 
502de962bdSlukem /* include socket.h to get sys/types.h and/or winsock2.h */
512de962bdSlukem #include <ac/socket.h>
522de962bdSlukem 
532de962bdSlukem #include "lutil.h"
542de962bdSlukem 
552de962bdSlukem static const char Base64[] =
562de962bdSlukem 	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
572de962bdSlukem static const char Pad64 = '=';
582de962bdSlukem 
592de962bdSlukem /* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
602de962bdSlukem    The following encoding technique is taken from RFC 1521 by Borenstein
612de962bdSlukem    and Freed.  It is reproduced here in a slightly edited form for
622de962bdSlukem    convenience.
632de962bdSlukem 
642de962bdSlukem    A 65-character subset of US-ASCII is used, enabling 6 bits to be
652de962bdSlukem    represented per printable character. (The extra 65th character, "=",
662de962bdSlukem    is used to signify a special processing function.)
672de962bdSlukem 
682de962bdSlukem    The encoding process represents 24-bit groups of input bits as output
692de962bdSlukem    strings of 4 encoded characters. Proceeding from left to right, a
702de962bdSlukem    24-bit input group is formed by concatenating 3 8-bit input groups.
712de962bdSlukem    These 24 bits are then treated as 4 concatenated 6-bit groups, each
722de962bdSlukem    of which is translated into a single digit in the base64 alphabet.
732de962bdSlukem 
742de962bdSlukem    Each 6-bit group is used as an index into an array of 64 printable
752de962bdSlukem    characters. The character referenced by the index is placed in the
762de962bdSlukem    output string.
772de962bdSlukem 
782de962bdSlukem                          Table 1: The Base64 Alphabet
792de962bdSlukem 
802de962bdSlukem       Value Encoding  Value Encoding  Value Encoding  Value Encoding
812de962bdSlukem           0 A            17 R            34 i            51 z
822de962bdSlukem           1 B            18 S            35 j            52 0
832de962bdSlukem           2 C            19 T            36 k            53 1
842de962bdSlukem           3 D            20 U            37 l            54 2
852de962bdSlukem           4 E            21 V            38 m            55 3
862de962bdSlukem           5 F            22 W            39 n            56 4
872de962bdSlukem           6 G            23 X            40 o            57 5
882de962bdSlukem           7 H            24 Y            41 p            58 6
892de962bdSlukem           8 I            25 Z            42 q            59 7
902de962bdSlukem           9 J            26 a            43 r            60 8
912de962bdSlukem          10 K            27 b            44 s            61 9
922de962bdSlukem          11 L            28 c            45 t            62 +
932de962bdSlukem          12 M            29 d            46 u            63 /
942de962bdSlukem          13 N            30 e            47 v
952de962bdSlukem          14 O            31 f            48 w         (pad) =
962de962bdSlukem          15 P            32 g            49 x
972de962bdSlukem          16 Q            33 h            50 y
982de962bdSlukem 
992de962bdSlukem    Special processing is performed if fewer than 24 bits are available
1002de962bdSlukem    at the end of the data being encoded.  A full encoding quantum is
1012de962bdSlukem    always completed at the end of a quantity.  When fewer than 24 input
1022de962bdSlukem    bits are available in an input group, zero bits are added (on the
1032de962bdSlukem    right) to form an integral number of 6-bit groups.  Padding at the
1042de962bdSlukem    end of the data is performed using the '=' character.
1052de962bdSlukem 
1062de962bdSlukem    Since all base64 input is an integral number of octets, only the
1072de962bdSlukem          -------------------------------------------------
1082de962bdSlukem    following cases can arise:
1092de962bdSlukem 
1102de962bdSlukem        (1) the final quantum of encoding input is an integral
1112de962bdSlukem            multiple of 24 bits; here, the final unit of encoded
1122de962bdSlukem 	   output will be an integral multiple of 4 characters
1132de962bdSlukem 	   with no "=" padding,
1142de962bdSlukem        (2) the final quantum of encoding input is exactly 8 bits;
1152de962bdSlukem            here, the final unit of encoded output will be two
1162de962bdSlukem 	   characters followed by two "=" padding characters, or
1172de962bdSlukem        (3) the final quantum of encoding input is exactly 16 bits;
1182de962bdSlukem            here, the final unit of encoded output will be three
1192de962bdSlukem 	   characters followed by one "=" padding character.
1202de962bdSlukem    */
1212de962bdSlukem 
1222de962bdSlukem int
lutil_b64_ntop(u_char const * src,size_t srclength,char * target,size_t targsize)1232de962bdSlukem lutil_b64_ntop(
1242de962bdSlukem 	u_char const *src,
1252de962bdSlukem 	size_t srclength,
1262de962bdSlukem 	char *target,
1272de962bdSlukem 	size_t targsize)
1282de962bdSlukem {
1292de962bdSlukem 	size_t datalength = 0;
1302de962bdSlukem 	u_char input[3];
1312de962bdSlukem 	u_char output[4];
1322de962bdSlukem 	size_t i;
1332de962bdSlukem 
1342de962bdSlukem 	while (2 < srclength) {
1352de962bdSlukem 		input[0] = *src++;
1362de962bdSlukem 		input[1] = *src++;
1372de962bdSlukem 		input[2] = *src++;
1382de962bdSlukem 		srclength -= 3;
1392de962bdSlukem 
1402de962bdSlukem 		output[0] = input[0] >> 2;
1412de962bdSlukem 		output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
1422de962bdSlukem 		output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
1432de962bdSlukem 		output[3] = input[2] & 0x3f;
1442de962bdSlukem 		assert(output[0] < 64);
1452de962bdSlukem 		assert(output[1] < 64);
1462de962bdSlukem 		assert(output[2] < 64);
1472de962bdSlukem 		assert(output[3] < 64);
1482de962bdSlukem 
1492de962bdSlukem 		if (datalength + 4 > targsize)
1502de962bdSlukem 			return (-1);
1512de962bdSlukem 		target[datalength++] = Base64[output[0]];
1522de962bdSlukem 		target[datalength++] = Base64[output[1]];
1532de962bdSlukem 		target[datalength++] = Base64[output[2]];
1542de962bdSlukem 		target[datalength++] = Base64[output[3]];
1552de962bdSlukem 	}
1562de962bdSlukem 
1572de962bdSlukem 	/* Now we worry about padding. */
1582de962bdSlukem 	if (0 != srclength) {
1592de962bdSlukem 		/* Get what's left. */
1602de962bdSlukem 		input[0] = input[1] = input[2] = '\0';
1612de962bdSlukem 		for (i = 0; i < srclength; i++)
1622de962bdSlukem 			input[i] = *src++;
1632de962bdSlukem 
1642de962bdSlukem 		output[0] = input[0] >> 2;
1652de962bdSlukem 		output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
1662de962bdSlukem 		output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
1672de962bdSlukem 		assert(output[0] < 64);
1682de962bdSlukem 		assert(output[1] < 64);
1692de962bdSlukem 		assert(output[2] < 64);
1702de962bdSlukem 
1712de962bdSlukem 		if (datalength + 4 > targsize)
1722de962bdSlukem 			return (-1);
1732de962bdSlukem 		target[datalength++] = Base64[output[0]];
1742de962bdSlukem 		target[datalength++] = Base64[output[1]];
1752de962bdSlukem 		if (srclength == 1)
1762de962bdSlukem 			target[datalength++] = Pad64;
1772de962bdSlukem 		else
1782de962bdSlukem 			target[datalength++] = Base64[output[2]];
1792de962bdSlukem 		target[datalength++] = Pad64;
1802de962bdSlukem 	}
1812de962bdSlukem 	if (datalength >= targsize)
1822de962bdSlukem 		return (-1);
1832de962bdSlukem 	target[datalength] = '\0';	/* Returned value doesn't count \0. */
1842de962bdSlukem 	return (datalength);
1852de962bdSlukem }
1862de962bdSlukem 
1872de962bdSlukem /* skips all whitespace anywhere.
1882de962bdSlukem    converts characters, four at a time, starting at (or after)
1892de962bdSlukem    src from base - 64 numbers into three 8 bit bytes in the target area.
1902de962bdSlukem    it returns the number of data bytes stored at the target, or -1 on error.
1912de962bdSlukem  */
1922de962bdSlukem 
1932de962bdSlukem int
lutil_b64_pton(char const * src,u_char * target,size_t targsize)1942de962bdSlukem lutil_b64_pton(
1952de962bdSlukem 	char const *src,
1962de962bdSlukem 	u_char *target,
1972de962bdSlukem 	size_t targsize)
1982de962bdSlukem {
1992de962bdSlukem 	int tarindex, state, ch;
2002de962bdSlukem 	char *pos;
2012de962bdSlukem 
2022de962bdSlukem 	state = 0;
2032de962bdSlukem 	tarindex = 0;
2042de962bdSlukem 
2052de962bdSlukem 	while ((ch = *src++) != '\0') {
2062de962bdSlukem 		if (isascii(ch) && isspace(ch))	/* Skip whitespace anywhere. */
2072de962bdSlukem 			continue;
2082de962bdSlukem 
2092de962bdSlukem 		if (ch == Pad64)
2102de962bdSlukem 			break;
2112de962bdSlukem 
2122de962bdSlukem 		pos = strchr(Base64, ch);
2132de962bdSlukem 		if (pos == 0) 		/* A non-base64 character. */
2142de962bdSlukem 			return (-1);
2152de962bdSlukem 
2162de962bdSlukem 		switch (state) {
2172de962bdSlukem 		case 0:
2182de962bdSlukem 			if (target) {
2192de962bdSlukem 				if ((size_t)tarindex >= targsize)
2202de962bdSlukem 					return (-1);
2212de962bdSlukem 				target[tarindex] = (pos - Base64) << 2;
2222de962bdSlukem 			}
2232de962bdSlukem 			state = 1;
2242de962bdSlukem 			break;
2252de962bdSlukem 		case 1:
2262de962bdSlukem 			if (target) {
2272de962bdSlukem 				if ((size_t)tarindex + 1 >= targsize)
2282de962bdSlukem 					return (-1);
2292de962bdSlukem 				target[tarindex]   |=  (pos - Base64) >> 4;
2302de962bdSlukem 				target[tarindex+1]  = ((pos - Base64) & 0x0f)
2312de962bdSlukem 							<< 4 ;
2322de962bdSlukem 			}
2332de962bdSlukem 			tarindex++;
2342de962bdSlukem 			state = 2;
2352de962bdSlukem 			break;
2362de962bdSlukem 		case 2:
2372de962bdSlukem 			if (target) {
2382de962bdSlukem 				if ((size_t)tarindex + 1 >= targsize)
2392de962bdSlukem 					return (-1);
2402de962bdSlukem 				target[tarindex]   |=  (pos - Base64) >> 2;
2412de962bdSlukem 				target[tarindex+1]  = ((pos - Base64) & 0x03)
2422de962bdSlukem 							<< 6;
2432de962bdSlukem 			}
2442de962bdSlukem 			tarindex++;
2452de962bdSlukem 			state = 3;
2462de962bdSlukem 			break;
2472de962bdSlukem 		case 3:
2482de962bdSlukem 			if (target) {
2492de962bdSlukem 				if ((size_t)tarindex >= targsize)
2502de962bdSlukem 					return (-1);
2512de962bdSlukem 				target[tarindex] |= (pos - Base64);
2522de962bdSlukem 			}
2532de962bdSlukem 			tarindex++;
2542de962bdSlukem 			state = 0;
2552de962bdSlukem 			break;
2562de962bdSlukem 		default:
2572de962bdSlukem 			abort();
2582de962bdSlukem 		}
2592de962bdSlukem 	}
2602de962bdSlukem 
2612de962bdSlukem 	/*
2622de962bdSlukem 	 * We are done decoding Base-64 chars.  Let's see if we ended
2632de962bdSlukem 	 * on a byte boundary, and/or with erroneous trailing characters.
2642de962bdSlukem 	 */
2652de962bdSlukem 
2662de962bdSlukem 	if (ch == Pad64) {		/* We got a pad char. */
2672de962bdSlukem 		ch = *src++;		/* Skip it, get next. */
2682de962bdSlukem 		switch (state) {
2692de962bdSlukem 		case 0:		/* Invalid = in first position */
2702de962bdSlukem 		case 1:		/* Invalid = in second position */
2712de962bdSlukem 			return (-1);
2722de962bdSlukem 
2732de962bdSlukem 		case 2:		/* Valid, means one byte of info */
2742de962bdSlukem 			/* Skip any number of spaces. */
2752de962bdSlukem 			for ((void)NULL; ch != '\0'; ch = *src++)
2762de962bdSlukem 				if (! (isascii(ch) && isspace(ch)))
2772de962bdSlukem 					break;
2782de962bdSlukem 			/* Make sure there is another trailing = sign. */
2792de962bdSlukem 			if (ch != Pad64)
2802de962bdSlukem 				return (-1);
2812de962bdSlukem 			ch = *src++;		/* Skip the = */
2822de962bdSlukem 			/* Fall through to "single trailing =" case. */
2832de962bdSlukem 			/* FALLTHROUGH */
2842de962bdSlukem 
2852de962bdSlukem 		case 3:		/* Valid, means two bytes of info */
2862de962bdSlukem 			/*
2872de962bdSlukem 			 * We know this char is an =.  Is there anything but
2882de962bdSlukem 			 * whitespace after it?
2892de962bdSlukem 			 */
2902de962bdSlukem 			for ((void)NULL; ch != '\0'; ch = *src++)
2912de962bdSlukem 				if (! (isascii(ch) && isspace(ch)))
2922de962bdSlukem 					return (-1);
2932de962bdSlukem 
2942de962bdSlukem 			/*
2952de962bdSlukem 			 * Now make sure for cases 2 and 3 that the "extra"
2962de962bdSlukem 			 * bits that slopped past the last full byte were
2972de962bdSlukem 			 * zeros.  If we don't check them, they become a
2982de962bdSlukem 			 * subliminal channel.
2992de962bdSlukem 			 */
3002de962bdSlukem 			if (target && target[tarindex] != 0)
3012de962bdSlukem 				return (-1);
3022de962bdSlukem 		}
3032de962bdSlukem 	} else {
3042de962bdSlukem 		/*
3052de962bdSlukem 		 * We ended by seeing the end of the string.  Make sure we
3062de962bdSlukem 		 * have no partial bytes lying around.
3072de962bdSlukem 		 */
3082de962bdSlukem 		if (state != 0)
3092de962bdSlukem 			return (-1);
3102de962bdSlukem 	}
3112de962bdSlukem 
3122de962bdSlukem 	return (tarindex);
3132de962bdSlukem }
314