xref: /netbsd-src/lib/libc/net/base64.c (revision ace5b9b5feb0e7608bd2da7a617428d2e1cf8aa3)
1*ace5b9b5Schristos /*	$NetBSD: base64.c,v 1.17 2024/01/20 14:52:48 christos Exp $	*/
2023690d8Smrg 
32479c1a2Smrg /*
4d73eb73dSchristos  * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
5d73eb73dSchristos  * Copyright (c) 1996-1999 by Internet Software Consortium.
62479c1a2Smrg  *
72479c1a2Smrg  * Permission to use, copy, modify, and distribute this software for any
82479c1a2Smrg  * purpose with or without fee is hereby granted, provided that the above
92479c1a2Smrg  * copyright notice and this permission notice appear in all copies.
102479c1a2Smrg  *
11d73eb73dSchristos  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
12d73eb73dSchristos  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13d73eb73dSchristos  * MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR
14d73eb73dSchristos  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15d73eb73dSchristos  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16d73eb73dSchristos  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
17d73eb73dSchristos  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
182479c1a2Smrg  */
192479c1a2Smrg 
202479c1a2Smrg /*
212479c1a2Smrg  * Portions Copyright (c) 1995 by International Business Machines, Inc.
222479c1a2Smrg  *
232479c1a2Smrg  * International Business Machines, Inc. (hereinafter called IBM) grants
242479c1a2Smrg  * permission under its copyrights to use, copy, modify, and distribute this
252479c1a2Smrg  * Software with or without fee, provided that the above copyright notice and
262479c1a2Smrg  * all paragraphs of this notice appear in all copies, and that the name of IBM
272479c1a2Smrg  * not be used in connection with the marketing of any product incorporating
282479c1a2Smrg  * the Software or modifications thereof, without specific, written prior
292479c1a2Smrg  * permission.
302479c1a2Smrg  *
312479c1a2Smrg  * To the extent it has a right to do so, IBM grants an immunity from suit
322479c1a2Smrg  * under its patents, if any, for the use, sale or manufacture of products to
332479c1a2Smrg  * the extent that such products are used for performing Domain Name System
342479c1a2Smrg  * dynamic updates in TCP/IP networks by means of the Software.  No immunity is
352479c1a2Smrg  * granted for any product per se or for any other function of any product.
362479c1a2Smrg  *
372479c1a2Smrg  * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
382479c1a2Smrg  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
392479c1a2Smrg  * PARTICULAR PURPOSE.  IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
402479c1a2Smrg  * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
412479c1a2Smrg  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
422479c1a2Smrg  * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
432479c1a2Smrg  */
442479c1a2Smrg 
458d36d340Schristos #include <sys/cdefs.h>
46023690d8Smrg #if defined(LIBC_SCCS) && !defined(lint)
47d73eb73dSchristos #if 0
483873655bSchristos static const char rcsid[] = "Id: base64.c,v 1.4 2005/04/27 04:56:34 sra Exp";
49d73eb73dSchristos #else
50*ace5b9b5Schristos __RCSID("$NetBSD: base64.c,v 1.17 2024/01/20 14:52:48 christos Exp $");
51d73eb73dSchristos #endif
52023690d8Smrg #endif /* LIBC_SCCS and not lint */
53023690d8Smrg 
54d73eb73dSchristos #include "port_before.h"
55d73eb73dSchristos 
562479c1a2Smrg #include <sys/types.h>
572479c1a2Smrg #include <sys/param.h>
582479c1a2Smrg #include <sys/socket.h>
59d73eb73dSchristos 
602479c1a2Smrg #include <netinet/in.h>
612479c1a2Smrg #include <arpa/inet.h>
622479c1a2Smrg #include <arpa/nameser.h>
632479c1a2Smrg 
64b48252f3Slukem #include <assert.h>
652479c1a2Smrg #include <ctype.h>
662479c1a2Smrg #include <resolv.h>
672479c1a2Smrg #include <stdio.h>
682479c1a2Smrg #include <stdlib.h>
692479c1a2Smrg #include <string.h>
70d73eb73dSchristos 
71d73eb73dSchristos #include "port_after.h"
722479c1a2Smrg 
732479c1a2Smrg #define Assert(Cond) if (!(Cond)) abort()
742479c1a2Smrg 
752479c1a2Smrg static const char Base64[] =
762479c1a2Smrg 	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
772479c1a2Smrg static const char Pad64 = '=';
782479c1a2Smrg 
792479c1a2Smrg /* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
802479c1a2Smrg    The following encoding technique is taken from RFC1521 by Borenstein
812479c1a2Smrg    and Freed.  It is reproduced here in a slightly edited form for
822479c1a2Smrg    convenience.
832479c1a2Smrg 
842479c1a2Smrg    A 65-character subset of US-ASCII is used, enabling 6 bits to be
852479c1a2Smrg    represented per printable character. (The extra 65th character, "=",
862479c1a2Smrg    is used to signify a special processing function.)
872479c1a2Smrg 
882479c1a2Smrg    The encoding process represents 24-bit groups of input bits as output
892479c1a2Smrg    strings of 4 encoded characters. Proceeding from left to right, a
902479c1a2Smrg    24-bit input group is formed by concatenating 3 8-bit input groups.
912479c1a2Smrg    These 24 bits are then treated as 4 concatenated 6-bit groups, each
922479c1a2Smrg    of which is translated into a single digit in the base64 alphabet.
932479c1a2Smrg 
942479c1a2Smrg    Each 6-bit group is used as an index into an array of 64 printable
952479c1a2Smrg    characters. The character referenced by the index is placed in the
962479c1a2Smrg    output string.
972479c1a2Smrg 
982479c1a2Smrg                          Table 1: The Base64 Alphabet
992479c1a2Smrg 
1002479c1a2Smrg       Value Encoding  Value Encoding  Value Encoding  Value Encoding
1012479c1a2Smrg           0 A            17 R            34 i            51 z
1022479c1a2Smrg           1 B            18 S            35 j            52 0
1032479c1a2Smrg           2 C            19 T            36 k            53 1
1042479c1a2Smrg           3 D            20 U            37 l            54 2
1052479c1a2Smrg           4 E            21 V            38 m            55 3
1062479c1a2Smrg           5 F            22 W            39 n            56 4
1072479c1a2Smrg           6 G            23 X            40 o            57 5
1082479c1a2Smrg           7 H            24 Y            41 p            58 6
1092479c1a2Smrg           8 I            25 Z            42 q            59 7
1102479c1a2Smrg           9 J            26 a            43 r            60 8
1112479c1a2Smrg          10 K            27 b            44 s            61 9
1122479c1a2Smrg          11 L            28 c            45 t            62 +
1132479c1a2Smrg          12 M            29 d            46 u            63 /
1142479c1a2Smrg          13 N            30 e            47 v
1152479c1a2Smrg          14 O            31 f            48 w         (pad) =
1162479c1a2Smrg          15 P            32 g            49 x
1172479c1a2Smrg          16 Q            33 h            50 y
1182479c1a2Smrg 
1192479c1a2Smrg    Special processing is performed if fewer than 24 bits are available
1202479c1a2Smrg    at the end of the data being encoded.  A full encoding quantum is
1212479c1a2Smrg    always completed at the end of a quantity.  When fewer than 24 input
1222479c1a2Smrg    bits are available in an input group, zero bits are added (on the
1232479c1a2Smrg    right) to form an integral number of 6-bit groups.  Padding at the
1242479c1a2Smrg    end of the data is performed using the '=' character.
1252479c1a2Smrg 
1262479c1a2Smrg    Since all base64 input is an integral number of octets, only the
1272479c1a2Smrg          -------------------------------------------------
1282479c1a2Smrg    following cases can arise:
1292479c1a2Smrg 
1302479c1a2Smrg        (1) the final quantum of encoding input is an integral
1312479c1a2Smrg            multiple of 24 bits; here, the final unit of encoded
1322479c1a2Smrg 	   output will be an integral multiple of 4 characters
1332479c1a2Smrg 	   with no "=" padding,
1342479c1a2Smrg        (2) the final quantum of encoding input is exactly 8 bits;
1352479c1a2Smrg            here, the final unit of encoded output will be two
1362479c1a2Smrg 	   characters followed by two "=" padding characters, or
1372479c1a2Smrg        (3) the final quantum of encoding input is exactly 16 bits;
1382479c1a2Smrg            here, the final unit of encoded output will be three
1392479c1a2Smrg 	   characters followed by one "=" padding character.
1402479c1a2Smrg    */
1412479c1a2Smrg 
1422479c1a2Smrg int
b64_ntop(u_char const * src,size_t srclength,char * target,size_t targsize)143d73eb73dSchristos b64_ntop(u_char const *src, size_t srclength, char *target, size_t targsize) {
1442479c1a2Smrg 	size_t datalength = 0;
1452479c1a2Smrg 	u_char input[3];
1462479c1a2Smrg 	u_char output[4];
14720b98814Sthorpej 	size_t i;
1482479c1a2Smrg 
149b48252f3Slukem 	_DIAGASSERT(src != NULL);
150b48252f3Slukem 	_DIAGASSERT(target != NULL);
151b48252f3Slukem 
152d73eb73dSchristos 	while (2U < srclength) {
1532479c1a2Smrg 		input[0] = *src++;
1542479c1a2Smrg 		input[1] = *src++;
1552479c1a2Smrg 		input[2] = *src++;
1562479c1a2Smrg 		srclength -= 3;
1572479c1a2Smrg 
15894783bbfSchristos 		output[0] = (uint32_t)input[0] >> 2;
15994783bbfSchristos 		output[1] = ((uint32_t)(input[0] & 0x03) << 4) +
16094783bbfSchristos 		    ((uint32_t)input[1] >> 4);
16194783bbfSchristos 		output[2] = ((uint32_t)(input[1] & 0x0f) << 2) +
16294783bbfSchristos 		    ((uint32_t)input[2] >> 6);
1632479c1a2Smrg 		output[3] = input[2] & 0x3f;
1642479c1a2Smrg 		Assert(output[0] < 64);
1652479c1a2Smrg 		Assert(output[1] < 64);
1662479c1a2Smrg 		Assert(output[2] < 64);
1672479c1a2Smrg 		Assert(output[3] < 64);
1682479c1a2Smrg 
1692479c1a2Smrg 		if (datalength + 4 > targsize)
17094783bbfSchristos 			return -1;
1712479c1a2Smrg 		target[datalength++] = Base64[output[0]];
1722479c1a2Smrg 		target[datalength++] = Base64[output[1]];
1732479c1a2Smrg 		target[datalength++] = Base64[output[2]];
1742479c1a2Smrg 		target[datalength++] = Base64[output[3]];
1752479c1a2Smrg 	}
1762479c1a2Smrg 
1772479c1a2Smrg 	/* Now we worry about padding. */
178d73eb73dSchristos 	if (0U != srclength) {
1792479c1a2Smrg 		/* Get what's left. */
1802479c1a2Smrg 		input[0] = input[1] = input[2] = '\0';
1812479c1a2Smrg 		for (i = 0; i < srclength; i++)
1822479c1a2Smrg 			input[i] = *src++;
1832479c1a2Smrg 
18494783bbfSchristos 		output[0] = (uint32_t)input[0] >> 2;
18594783bbfSchristos 		output[1] = ((uint32_t)(input[0] & 0x03) << 4) +
18694783bbfSchristos 		    ((uint32_t)input[1] >> 4);
18794783bbfSchristos 		output[2] = ((uint32_t)(input[1] & 0x0f) << 2) +
18894783bbfSchristos 		    ((uint32_t)input[2] >> 6);
1892479c1a2Smrg 		Assert(output[0] < 64);
1902479c1a2Smrg 		Assert(output[1] < 64);
1912479c1a2Smrg 		Assert(output[2] < 64);
1922479c1a2Smrg 
1932479c1a2Smrg 		if (datalength + 4 > targsize)
19494783bbfSchristos 			return -1;
1952479c1a2Smrg 		target[datalength++] = Base64[output[0]];
1962479c1a2Smrg 		target[datalength++] = Base64[output[1]];
197d73eb73dSchristos 		if (srclength == 1U)
1982479c1a2Smrg 			target[datalength++] = Pad64;
1992479c1a2Smrg 		else
2002479c1a2Smrg 			target[datalength++] = Base64[output[2]];
2012479c1a2Smrg 		target[datalength++] = Pad64;
2022479c1a2Smrg 	}
2032479c1a2Smrg 	if (datalength >= targsize)
20494783bbfSchristos 		return -1;
205d73eb73dSchristos 	target[datalength] = '\0';	/*%< Returned value doesn't count \\0. */
206c5e820caSchristos 	_DIAGASSERT(__type_fit(int, datalength));
207c5e820caSchristos 	return (int)datalength;
2082479c1a2Smrg }
2092479c1a2Smrg 
2102479c1a2Smrg /* skips all whitespace anywhere.
2112479c1a2Smrg    converts characters, four at a time, starting at (or after)
2122479c1a2Smrg    src from base - 64 numbers into three 8 bit bytes in the target area.
2132479c1a2Smrg    it returns the number of data bytes stored at the target, or -1 on error.
2142479c1a2Smrg  */
2152479c1a2Smrg 
2162479c1a2Smrg int
b64_pton(char const * src,u_char * target,size_t targsize)2179e66e6d7Sabs b64_pton(char const *src, u_char *target, size_t targsize)
2182479c1a2Smrg {
21920b98814Sthorpej 	size_t tarindex;
22020b98814Sthorpej 	int state, ch;
2218a88bf6aSchristos 	u_char nextbyte;
222*ace5b9b5Schristos 	const char *pos;
2232479c1a2Smrg 
224b48252f3Slukem 	_DIAGASSERT(src != NULL);
225b48252f3Slukem 	_DIAGASSERT(target != NULL);
226b48252f3Slukem 
2272479c1a2Smrg 	state = 0;
2282479c1a2Smrg 	tarindex = 0;
2292479c1a2Smrg 
2307daefc5aSitohy 	while ((ch = (u_char) *src++) != '\0') {
231d73eb73dSchristos 		if (isspace(ch))	/*%< Skip whitespace anywhere. */
2322479c1a2Smrg 			continue;
2332479c1a2Smrg 
2342479c1a2Smrg 		if (ch == Pad64)
2352479c1a2Smrg 			break;
2362479c1a2Smrg 
2372479c1a2Smrg 		pos = strchr(Base64, ch);
23894783bbfSchristos 		if (pos == NULL) 	/*%< A non-base64 character. */
23994783bbfSchristos 			return -1;
2402479c1a2Smrg 
2412479c1a2Smrg 		switch (state) {
2422479c1a2Smrg 		case 0:
2432479c1a2Smrg 			if (target) {
24494783bbfSchristos 				if (tarindex >= targsize)
24594783bbfSchristos 					return -1;
2468a88bf6aSchristos 				target[tarindex] = (u_char)(pos - Base64) << 2;
2472479c1a2Smrg 			}
2482479c1a2Smrg 			state = 1;
2492479c1a2Smrg 			break;
2502479c1a2Smrg 		case 1:
2512479c1a2Smrg 			if (target) {
25294783bbfSchristos 				if (tarindex >= targsize)
25394783bbfSchristos 					return -1;
2542c89343cSchristos 				target[tarindex] |=
25594783bbfSchristos 				    (uint32_t)(pos - Base64) >> 4;
2568a88bf6aSchristos 				nextbyte = (u_char)((pos - Base64) & 0x0f) << 4;
2578a88bf6aSchristos 				if (tarindex + 1 < targsize)
2588a88bf6aSchristos 					target[tarindex + 1] = nextbyte;
2598a88bf6aSchristos 				else if (nextbyte)
26094783bbfSchristos 					return -1;
2612479c1a2Smrg 			}
2622479c1a2Smrg 			tarindex++;
2632479c1a2Smrg 			state = 2;
2642479c1a2Smrg 			break;
2652479c1a2Smrg 		case 2:
2662479c1a2Smrg 			if (target) {
26794783bbfSchristos 				if (tarindex >= targsize)
26894783bbfSchristos 					return -1;
2692c89343cSchristos 				target[tarindex] |=
27094783bbfSchristos 					(uint32_t)(pos - Base64) >> 2;
2718a88bf6aSchristos 				nextbyte = (u_char)((pos - Base64) & 0x03) << 6;
2728a88bf6aSchristos 				if (tarindex + 1 < targsize)
2738a88bf6aSchristos 					target[tarindex + 1] = nextbyte;
2748a88bf6aSchristos 				else if (nextbyte)
27594783bbfSchristos 					return -1;
2762479c1a2Smrg 			}
2772479c1a2Smrg 			tarindex++;
2782479c1a2Smrg 			state = 3;
2792479c1a2Smrg 			break;
2802479c1a2Smrg 		case 3:
2812479c1a2Smrg 			if (target) {
282d73eb73dSchristos 				if ((size_t)tarindex >= targsize)
28394783bbfSchristos 					return -1;
28494783bbfSchristos 				target[tarindex] |= (u_char)(pos - Base64);
2852479c1a2Smrg 			}
2862479c1a2Smrg 			tarindex++;
2872479c1a2Smrg 			state = 0;
2882479c1a2Smrg 			break;
2892479c1a2Smrg 		default:
2902479c1a2Smrg 			abort();
2912479c1a2Smrg 		}
2922479c1a2Smrg 	}
2932479c1a2Smrg 
2942479c1a2Smrg 	/*
2952479c1a2Smrg 	 * We are done decoding Base-64 chars.  Let's see if we ended
2962479c1a2Smrg 	 * on a byte boundary, and/or with erroneous trailing characters.
2972479c1a2Smrg 	 */
2982479c1a2Smrg 
299d73eb73dSchristos 	if (ch == Pad64) {		/*%< We got a pad char. */
300d73eb73dSchristos 		ch = *src++;		/*%< Skip it, get next. */
3012479c1a2Smrg 		switch (state) {
302d73eb73dSchristos 		case 0:		/*%< Invalid = in first position */
303d73eb73dSchristos 		case 1:		/*%< Invalid = in second position */
30494783bbfSchristos 			return -1;
3052479c1a2Smrg 
306d73eb73dSchristos 		case 2:		/*%< Valid, means one byte of info */
3072479c1a2Smrg 			/* Skip any number of spaces. */
3087daefc5aSitohy 			for (; ch != '\0'; ch = (u_char) *src++)
3092479c1a2Smrg 				if (!isspace(ch))
3102479c1a2Smrg 					break;
3112479c1a2Smrg 			/* Make sure there is another trailing = sign. */
3122479c1a2Smrg 			if (ch != Pad64)
31394783bbfSchristos 				return -1;
314d73eb73dSchristos 			ch = *src++;		/*%< Skip the = */
3152479c1a2Smrg 			/* Fall through to "single trailing =" case. */
3162479c1a2Smrg 			/* FALLTHROUGH */
3172479c1a2Smrg 
318d73eb73dSchristos 		case 3:		/*%< Valid, means two bytes of info */
3192479c1a2Smrg 			/*
3202479c1a2Smrg 			 * We know this char is an =.  Is there anything but
3212479c1a2Smrg 			 * whitespace after it?
3222479c1a2Smrg 			 */
3237daefc5aSitohy 			for (; ch != '\0'; ch = (u_char) *src++)
3242479c1a2Smrg 				if (!isspace(ch))
32594783bbfSchristos 					return -1;
3262479c1a2Smrg 
3272479c1a2Smrg 			/*
3282479c1a2Smrg 			 * Now make sure for cases 2 and 3 that the "extra"
3292479c1a2Smrg 			 * bits that slopped past the last full byte were
3302479c1a2Smrg 			 * zeros.  If we don't check them, they become a
3312479c1a2Smrg 			 * subliminal channel.
3322479c1a2Smrg 			 */
3338a88bf6aSchristos 			if (target && tarindex < targsize &&
3348a88bf6aSchristos 			    target[tarindex] != 0)
33594783bbfSchristos 				return -1;
3362479c1a2Smrg 		}
3372479c1a2Smrg 	} else {
3382479c1a2Smrg 		/*
3392479c1a2Smrg 		 * We ended by seeing the end of the string.  Make sure we
3402479c1a2Smrg 		 * have no partial bytes lying around.
3412479c1a2Smrg 		 */
3422479c1a2Smrg 		if (state != 0)
34394783bbfSchristos 			return -1;
3442479c1a2Smrg 	}
3452479c1a2Smrg 
346c5e820caSchristos 	_DIAGASSERT(__type_fit(int, tarindex));
347c5e820caSchristos 	return (int)tarindex;
3482479c1a2Smrg }
349d73eb73dSchristos 
350d73eb73dSchristos /*! \file */
351