xref: /netbsd-src/lib/libc/net/base64.c (revision d0fed6c87ddc40a8bffa6f99e7433ddfc864dd83)
1 /*	$NetBSD: base64.c,v 1.2 1997/04/13 10:30:31 mrg Exp $	*/
2 
3 /*
4  * Copyright (c) 1996 by Internet Software Consortium.
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
11  * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
12  * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
13  * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
16  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
17  * SOFTWARE.
18  */
19 
20 /*
21  * Portions Copyright (c) 1995 by International Business Machines, Inc.
22  *
23  * International Business Machines, Inc. (hereinafter called IBM) grants
24  * permission under its copyrights to use, copy, modify, and distribute this
25  * Software with or without fee, provided that the above copyright notice and
26  * all paragraphs of this notice appear in all copies, and that the name of IBM
27  * not be used in connection with the marketing of any product incorporating
28  * the Software or modifications thereof, without specific, written prior
29  * permission.
30  *
31  * To the extent it has a right to do so, IBM grants an immunity from suit
32  * under its patents, if any, for the use, sale or manufacture of products to
33  * the extent that such products are used for performing Domain Name System
34  * dynamic updates in TCP/IP networks by means of the Software.  No immunity is
35  * granted for any product per se or for any other function of any product.
36  *
37  * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
38  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39  * PARTICULAR PURPOSE.  IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
40  * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
41  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
42  * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
43  */
44 
45 #if defined(LIBC_SCCS) && !defined(lint)
46 static char rcsid[] = "$NetBSD: base64.c,v 1.2 1997/04/13 10:30:31 mrg Exp $";
47 #endif /* LIBC_SCCS and not lint */
48 
49 
50 #include <sys/types.h>
51 #include <sys/param.h>
52 #include <sys/socket.h>
53 #include <netinet/in.h>
54 #include <arpa/inet.h>
55 #include <arpa/nameser.h>
56 
57 #include <ctype.h>
58 #include <resolv.h>
59 #include <stdio.h>
60 
61 #if defined(BSD) && (BSD >= 199103) && defined(AF_INET6)
62 # include <stdlib.h>
63 # include <string.h>
64 #else
65 # include "../conf/portability.h"
66 #endif
67 
68 #define Assert(Cond) if (!(Cond)) abort()
69 
70 static const char Base64[] =
71 	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
72 static const char Pad64 = '=';
73 
74 /* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
75    The following encoding technique is taken from RFC 1521 by Borenstein
76    and Freed.  It is reproduced here in a slightly edited form for
77    convenience.
78 
79    A 65-character subset of US-ASCII is used, enabling 6 bits to be
80    represented per printable character. (The extra 65th character, "=",
81    is used to signify a special processing function.)
82 
83    The encoding process represents 24-bit groups of input bits as output
84    strings of 4 encoded characters. Proceeding from left to right, a
85    24-bit input group is formed by concatenating 3 8-bit input groups.
86    These 24 bits are then treated as 4 concatenated 6-bit groups, each
87    of which is translated into a single digit in the base64 alphabet.
88 
89    Each 6-bit group is used as an index into an array of 64 printable
90    characters. The character referenced by the index is placed in the
91    output string.
92 
93                          Table 1: The Base64 Alphabet
94 
95       Value Encoding  Value Encoding  Value Encoding  Value Encoding
96           0 A            17 R            34 i            51 z
97           1 B            18 S            35 j            52 0
98           2 C            19 T            36 k            53 1
99           3 D            20 U            37 l            54 2
100           4 E            21 V            38 m            55 3
101           5 F            22 W            39 n            56 4
102           6 G            23 X            40 o            57 5
103           7 H            24 Y            41 p            58 6
104           8 I            25 Z            42 q            59 7
105           9 J            26 a            43 r            60 8
106          10 K            27 b            44 s            61 9
107          11 L            28 c            45 t            62 +
108          12 M            29 d            46 u            63 /
109          13 N            30 e            47 v
110          14 O            31 f            48 w         (pad) =
111          15 P            32 g            49 x
112          16 Q            33 h            50 y
113 
114    Special processing is performed if fewer than 24 bits are available
115    at the end of the data being encoded.  A full encoding quantum is
116    always completed at the end of a quantity.  When fewer than 24 input
117    bits are available in an input group, zero bits are added (on the
118    right) to form an integral number of 6-bit groups.  Padding at the
119    end of the data is performed using the '=' character.
120 
121    Since all base64 input is an integral number of octets, only the
122          -------------------------------------------------
123    following cases can arise:
124 
125        (1) the final quantum of encoding input is an integral
126            multiple of 24 bits; here, the final unit of encoded
127 	   output will be an integral multiple of 4 characters
128 	   with no "=" padding,
129        (2) the final quantum of encoding input is exactly 8 bits;
130            here, the final unit of encoded output will be two
131 	   characters followed by two "=" padding characters, or
132        (3) the final quantum of encoding input is exactly 16 bits;
133            here, the final unit of encoded output will be three
134 	   characters followed by one "=" padding character.
135    */
136 
137 int
138 b64_ntop(src, srclength, target, targsize)
139 	u_char const *src;
140 	size_t srclength;
141 	char *target;
142 	size_t targsize;
143 {
144 	size_t datalength = 0;
145 	u_char input[3];
146 	u_char output[4];
147 	int i;
148 
149 	while (2 < srclength) {
150 		input[0] = *src++;
151 		input[1] = *src++;
152 		input[2] = *src++;
153 		srclength -= 3;
154 
155 		output[0] = input[0] >> 2;
156 		output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
157 		output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
158 		output[3] = input[2] & 0x3f;
159 		Assert(output[0] < 64);
160 		Assert(output[1] < 64);
161 		Assert(output[2] < 64);
162 		Assert(output[3] < 64);
163 
164 		if (datalength + 4 > targsize)
165 			return (-1);
166 		target[datalength++] = Base64[output[0]];
167 		target[datalength++] = Base64[output[1]];
168 		target[datalength++] = Base64[output[2]];
169 		target[datalength++] = Base64[output[3]];
170 	}
171 
172 	/* Now we worry about padding. */
173 	if (0 != srclength) {
174 		/* Get what's left. */
175 		input[0] = input[1] = input[2] = '\0';
176 		for (i = 0; i < srclength; i++)
177 			input[i] = *src++;
178 
179 		output[0] = input[0] >> 2;
180 		output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
181 		output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
182 		Assert(output[0] < 64);
183 		Assert(output[1] < 64);
184 		Assert(output[2] < 64);
185 
186 		if (datalength + 4 > targsize)
187 			return (-1);
188 		target[datalength++] = Base64[output[0]];
189 		target[datalength++] = Base64[output[1]];
190 		if (srclength == 1)
191 			target[datalength++] = Pad64;
192 		else
193 			target[datalength++] = Base64[output[2]];
194 		target[datalength++] = Pad64;
195 	}
196 	if (datalength >= targsize)
197 		return (-1);
198 	target[datalength] = '\0';	/* Returned value doesn't count \0. */
199 	return (datalength);
200 }
201 
202 /* skips all whitespace anywhere.
203    converts characters, four at a time, starting at (or after)
204    src from base - 64 numbers into three 8 bit bytes in the target area.
205    it returns the number of data bytes stored at the target, or -1 on error.
206  */
207 
208 int
209 b64_pton(src, target, targsize)
210 	char const *src;
211 	u_char *target;
212 	size_t targsize;
213 {
214 	int tarindex, state, ch;
215 	char *pos;
216 
217 	state = 0;
218 	tarindex = 0;
219 
220 	while ((ch = *src++) != '\0') {
221 		if (isspace(ch))	/* Skip whitespace anywhere. */
222 			continue;
223 
224 		if (ch == Pad64)
225 			break;
226 
227 		pos = strchr(Base64, ch);
228 		if (pos == 0) 		/* A non-base64 character. */
229 			return (-1);
230 
231 		switch (state) {
232 		case 0:
233 			if (target) {
234 				if (tarindex >= targsize)
235 					return (-1);
236 				target[tarindex] = (pos - Base64) << 2;
237 			}
238 			state = 1;
239 			break;
240 		case 1:
241 			if (target) {
242 				if (tarindex + 1 >= targsize)
243 					return (-1);
244 				target[tarindex]   |=  (pos - Base64) >> 4;
245 				target[tarindex+1]  = ((pos - Base64) & 0x0f)
246 							<< 4 ;
247 			}
248 			tarindex++;
249 			state = 2;
250 			break;
251 		case 2:
252 			if (target) {
253 				if (tarindex + 1 >= targsize)
254 					return (-1);
255 				target[tarindex]   |=  (pos - Base64) >> 2;
256 				target[tarindex+1]  = ((pos - Base64) & 0x03)
257 							<< 6;
258 			}
259 			tarindex++;
260 			state = 3;
261 			break;
262 		case 3:
263 			if (target) {
264 				if (tarindex >= targsize)
265 					return (-1);
266 				target[tarindex] |= (pos - Base64);
267 			}
268 			tarindex++;
269 			state = 0;
270 			break;
271 		default:
272 			abort();
273 		}
274 	}
275 
276 	/*
277 	 * We are done decoding Base-64 chars.  Let's see if we ended
278 	 * on a byte boundary, and/or with erroneous trailing characters.
279 	 */
280 
281 	if (ch == Pad64) {		/* We got a pad char. */
282 		ch = *src++;		/* Skip it, get next. */
283 		switch (state) {
284 		case 0:		/* Invalid = in first position */
285 		case 1:		/* Invalid = in second position */
286 			return (-1);
287 
288 		case 2:		/* Valid, means one byte of info */
289 			/* Skip any number of spaces. */
290 			for (NULL; ch != '\0'; ch = *src++)
291 				if (!isspace(ch))
292 					break;
293 			/* Make sure there is another trailing = sign. */
294 			if (ch != Pad64)
295 				return (-1);
296 			ch = *src++;		/* Skip the = */
297 			/* Fall through to "single trailing =" case. */
298 			/* FALLTHROUGH */
299 
300 		case 3:		/* Valid, means two bytes of info */
301 			/*
302 			 * We know this char is an =.  Is there anything but
303 			 * whitespace after it?
304 			 */
305 			for (NULL; ch != '\0'; ch = *src++)
306 				if (!isspace(ch))
307 					return (-1);
308 
309 			/*
310 			 * Now make sure for cases 2 and 3 that the "extra"
311 			 * bits that slopped past the last full byte were
312 			 * zeros.  If we don't check them, they become a
313 			 * subliminal channel.
314 			 */
315 			if (target && target[tarindex] != 0)
316 				return (-1);
317 		}
318 	} else {
319 		/*
320 		 * We ended by seeing the end of the string.  Make sure we
321 		 * have no partial bytes lying around.
322 		 */
323 		if (state != 0)
324 			return (-1);
325 	}
326 
327 	return (tarindex);
328 }
329