xref: /netbsd-src/external/bsd/openldap/dist/libraries/liblutil/base64.c (revision 404fbe5fb94ca1e054339640cabb2801ce52dd30)
1 /* base64.c -- routines to encode/decode base64 data */
2 /* $OpenLDAP: pkg/ldap/libraries/liblutil/base64.c,v 1.15.2.3 2008/02/11 23:26:42 kurt Exp $ */
3 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
4  *
5  * Copyright 1998-2008 The OpenLDAP Foundation.
6  * Portions Copyright 1998-2003 Kurt D. Zeilenga.
7  * Portions Copyright 1995 IBM Corporation.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted only as authorized by the OpenLDAP
12  * Public License.
13  *
14  * A copy of this license is available in the file LICENSE in the
15  * top-level directory of the distribution or, alternatively, at
16  * <http://www.OpenLDAP.org/license.html>.
17  */
18 /* Portions Copyright (c) 1996, 1998 by Internet Software Consortium.
19  *
20  * Permission to use, copy, modify, and distribute this software for any
21  * purpose with or without fee is hereby granted, provided that the above
22  * copyright notice and this permission notice appear in all copies.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
25  * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
27  * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
28  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
29  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
30  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
31  * SOFTWARE.
32  */
33 /* This work is based upon Base64 routines (developed by IBM) found
34  * Berkeley Internet Name Daemon (BIND) as distributed by ISC.  They
35  * were adapted for inclusion in OpenLDAP Software by Kurt D. Zeilenga.
36  */
37 
38 #include "portable.h"
39 
40 #include <ac/assert.h>
41 #include <ac/stdlib.h>
42 #include <ac/ctype.h>
43 #include <ac/string.h>
44 
45 /* include socket.h to get sys/types.h and/or winsock2.h */
46 #include <ac/socket.h>
47 
48 #include "lutil.h"
49 
50 static const char Base64[] =
51 	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
52 static const char Pad64 = '=';
53 
54 /* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
55    The following encoding technique is taken from RFC 1521 by Borenstein
56    and Freed.  It is reproduced here in a slightly edited form for
57    convenience.
58 
59    A 65-character subset of US-ASCII is used, enabling 6 bits to be
60    represented per printable character. (The extra 65th character, "=",
61    is used to signify a special processing function.)
62 
63    The encoding process represents 24-bit groups of input bits as output
64    strings of 4 encoded characters. Proceeding from left to right, a
65    24-bit input group is formed by concatenating 3 8-bit input groups.
66    These 24 bits are then treated as 4 concatenated 6-bit groups, each
67    of which is translated into a single digit in the base64 alphabet.
68 
69    Each 6-bit group is used as an index into an array of 64 printable
70    characters. The character referenced by the index is placed in the
71    output string.
72 
73                          Table 1: The Base64 Alphabet
74 
75       Value Encoding  Value Encoding  Value Encoding  Value Encoding
76           0 A            17 R            34 i            51 z
77           1 B            18 S            35 j            52 0
78           2 C            19 T            36 k            53 1
79           3 D            20 U            37 l            54 2
80           4 E            21 V            38 m            55 3
81           5 F            22 W            39 n            56 4
82           6 G            23 X            40 o            57 5
83           7 H            24 Y            41 p            58 6
84           8 I            25 Z            42 q            59 7
85           9 J            26 a            43 r            60 8
86          10 K            27 b            44 s            61 9
87          11 L            28 c            45 t            62 +
88          12 M            29 d            46 u            63 /
89          13 N            30 e            47 v
90          14 O            31 f            48 w         (pad) =
91          15 P            32 g            49 x
92          16 Q            33 h            50 y
93 
94    Special processing is performed if fewer than 24 bits are available
95    at the end of the data being encoded.  A full encoding quantum is
96    always completed at the end of a quantity.  When fewer than 24 input
97    bits are available in an input group, zero bits are added (on the
98    right) to form an integral number of 6-bit groups.  Padding at the
99    end of the data is performed using the '=' character.
100 
101    Since all base64 input is an integral number of octets, only the
102          -------------------------------------------------
103    following cases can arise:
104 
105        (1) the final quantum of encoding input is an integral
106            multiple of 24 bits; here, the final unit of encoded
107 	   output will be an integral multiple of 4 characters
108 	   with no "=" padding,
109        (2) the final quantum of encoding input is exactly 8 bits;
110            here, the final unit of encoded output will be two
111 	   characters followed by two "=" padding characters, or
112        (3) the final quantum of encoding input is exactly 16 bits;
113            here, the final unit of encoded output will be three
114 	   characters followed by one "=" padding character.
115    */
116 
117 int
118 lutil_b64_ntop(
119 	u_char const *src,
120 	size_t srclength,
121 	char *target,
122 	size_t targsize)
123 {
124 	size_t datalength = 0;
125 	u_char input[3];
126 	u_char output[4];
127 	size_t i;
128 
129 	while (2 < srclength) {
130 		input[0] = *src++;
131 		input[1] = *src++;
132 		input[2] = *src++;
133 		srclength -= 3;
134 
135 		output[0] = input[0] >> 2;
136 		output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
137 		output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
138 		output[3] = input[2] & 0x3f;
139 		assert(output[0] < 64);
140 		assert(output[1] < 64);
141 		assert(output[2] < 64);
142 		assert(output[3] < 64);
143 
144 		if (datalength + 4 > targsize)
145 			return (-1);
146 		target[datalength++] = Base64[output[0]];
147 		target[datalength++] = Base64[output[1]];
148 		target[datalength++] = Base64[output[2]];
149 		target[datalength++] = Base64[output[3]];
150 	}
151 
152 	/* Now we worry about padding. */
153 	if (0 != srclength) {
154 		/* Get what's left. */
155 		input[0] = input[1] = input[2] = '\0';
156 		for (i = 0; i < srclength; i++)
157 			input[i] = *src++;
158 
159 		output[0] = input[0] >> 2;
160 		output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
161 		output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
162 		assert(output[0] < 64);
163 		assert(output[1] < 64);
164 		assert(output[2] < 64);
165 
166 		if (datalength + 4 > targsize)
167 			return (-1);
168 		target[datalength++] = Base64[output[0]];
169 		target[datalength++] = Base64[output[1]];
170 		if (srclength == 1)
171 			target[datalength++] = Pad64;
172 		else
173 			target[datalength++] = Base64[output[2]];
174 		target[datalength++] = Pad64;
175 	}
176 	if (datalength >= targsize)
177 		return (-1);
178 	target[datalength] = '\0';	/* Returned value doesn't count \0. */
179 	return (datalength);
180 }
181 
182 /* skips all whitespace anywhere.
183    converts characters, four at a time, starting at (or after)
184    src from base - 64 numbers into three 8 bit bytes in the target area.
185    it returns the number of data bytes stored at the target, or -1 on error.
186  */
187 
188 int
189 lutil_b64_pton(
190 	char const *src,
191 	u_char *target,
192 	size_t targsize)
193 {
194 	int tarindex, state, ch;
195 	char *pos;
196 
197 	state = 0;
198 	tarindex = 0;
199 
200 	while ((ch = *src++) != '\0') {
201 		if (isascii(ch) && isspace(ch))	/* Skip whitespace anywhere. */
202 			continue;
203 
204 		if (ch == Pad64)
205 			break;
206 
207 		pos = strchr(Base64, ch);
208 		if (pos == 0) 		/* A non-base64 character. */
209 			return (-1);
210 
211 		switch (state) {
212 		case 0:
213 			if (target) {
214 				if ((size_t)tarindex >= targsize)
215 					return (-1);
216 				target[tarindex] = (pos - Base64) << 2;
217 			}
218 			state = 1;
219 			break;
220 		case 1:
221 			if (target) {
222 				if ((size_t)tarindex + 1 >= targsize)
223 					return (-1);
224 				target[tarindex]   |=  (pos - Base64) >> 4;
225 				target[tarindex+1]  = ((pos - Base64) & 0x0f)
226 							<< 4 ;
227 			}
228 			tarindex++;
229 			state = 2;
230 			break;
231 		case 2:
232 			if (target) {
233 				if ((size_t)tarindex + 1 >= targsize)
234 					return (-1);
235 				target[tarindex]   |=  (pos - Base64) >> 2;
236 				target[tarindex+1]  = ((pos - Base64) & 0x03)
237 							<< 6;
238 			}
239 			tarindex++;
240 			state = 3;
241 			break;
242 		case 3:
243 			if (target) {
244 				if ((size_t)tarindex >= targsize)
245 					return (-1);
246 				target[tarindex] |= (pos - Base64);
247 			}
248 			tarindex++;
249 			state = 0;
250 			break;
251 		default:
252 			abort();
253 		}
254 	}
255 
256 	/*
257 	 * We are done decoding Base-64 chars.  Let's see if we ended
258 	 * on a byte boundary, and/or with erroneous trailing characters.
259 	 */
260 
261 	if (ch == Pad64) {		/* We got a pad char. */
262 		ch = *src++;		/* Skip it, get next. */
263 		switch (state) {
264 		case 0:		/* Invalid = in first position */
265 		case 1:		/* Invalid = in second position */
266 			return (-1);
267 
268 		case 2:		/* Valid, means one byte of info */
269 			/* Skip any number of spaces. */
270 			for ((void)NULL; ch != '\0'; ch = *src++)
271 				if (! (isascii(ch) && isspace(ch)))
272 					break;
273 			/* Make sure there is another trailing = sign. */
274 			if (ch != Pad64)
275 				return (-1);
276 			ch = *src++;		/* Skip the = */
277 			/* Fall through to "single trailing =" case. */
278 			/* FALLTHROUGH */
279 
280 		case 3:		/* Valid, means two bytes of info */
281 			/*
282 			 * We know this char is an =.  Is there anything but
283 			 * whitespace after it?
284 			 */
285 			for ((void)NULL; ch != '\0'; ch = *src++)
286 				if (! (isascii(ch) && isspace(ch)))
287 					return (-1);
288 
289 			/*
290 			 * Now make sure for cases 2 and 3 that the "extra"
291 			 * bits that slopped past the last full byte were
292 			 * zeros.  If we don't check them, they become a
293 			 * subliminal channel.
294 			 */
295 			if (target && target[tarindex] != 0)
296 				return (-1);
297 		}
298 	} else {
299 		/*
300 		 * We ended by seeing the end of the string.  Make sure we
301 		 * have no partial bytes lying around.
302 		 */
303 		if (state != 0)
304 			return (-1);
305 	}
306 
307 	return (tarindex);
308 }
309