xref: /openbsd-src/lib/libc/net/base64.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 /*	$OpenBSD: base64.c,v 1.12 2021/10/22 10:22:15 tb Exp $	*/
2 
3 /*
4  * Copyright (c) 1996 by Internet Software Consortium.
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
11  * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
12  * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
13  * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
16  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
17  * SOFTWARE.
18  */
19 
20 /*
21  * Portions Copyright (c) 1995 by International Business Machines, Inc.
22  *
23  * International Business Machines, Inc. (hereinafter called IBM) grants
24  * permission under its copyrights to use, copy, modify, and distribute this
25  * Software with or without fee, provided that the above copyright notice and
26  * all paragraphs of this notice appear in all copies, and that the name of IBM
27  * not be used in connection with the marketing of any product incorporating
28  * the Software or modifications thereof, without specific, written prior
29  * permission.
30  *
31  * To the extent it has a right to do so, IBM grants an immunity from suit
32  * under its patents, if any, for the use, sale or manufacture of products to
33  * the extent that such products are used for performing Domain Name System
34  * dynamic updates in TCP/IP networks by means of the Software.  No immunity is
35  * granted for any product per se or for any other function of any product.
36  *
37  * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
38  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39  * PARTICULAR PURPOSE.  IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
40  * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
41  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
42  * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
43  */
44 
45 #include <sys/types.h>
46 #include <sys/socket.h>
47 #include <netinet/in.h>
48 #include <arpa/inet.h>
49 
50 #include <ctype.h>
51 #include <resolv.h>
52 
53 #include <stdlib.h>
54 #include <string.h>
55 
56 static const char Base64[] =
57 	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
58 static const char Pad64 = '=';
59 
60 /* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
61    The following encoding technique is taken from RFC 1521 by Borenstein
62    and Freed.  It is reproduced here in a slightly edited form for
63    convenience.
64 
65    A 65-character subset of US-ASCII is used, enabling 6 bits to be
66    represented per printable character. (The extra 65th character, "=",
67    is used to signify a special processing function.)
68 
69    The encoding process represents 24-bit groups of input bits as output
70    strings of 4 encoded characters. Proceeding from left to right, a
71    24-bit input group is formed by concatenating 3 8-bit input groups.
72    These 24 bits are then treated as 4 concatenated 6-bit groups, each
73    of which is translated into a single digit in the base64 alphabet.
74 
75    Each 6-bit group is used as an index into an array of 64 printable
76    characters. The character referenced by the index is placed in the
77    output string.
78 
79                          Table 1: The Base64 Alphabet
80 
81       Value Encoding  Value Encoding  Value Encoding  Value Encoding
82           0 A            17 R            34 i            51 z
83           1 B            18 S            35 j            52 0
84           2 C            19 T            36 k            53 1
85           3 D            20 U            37 l            54 2
86           4 E            21 V            38 m            55 3
87           5 F            22 W            39 n            56 4
88           6 G            23 X            40 o            57 5
89           7 H            24 Y            41 p            58 6
90           8 I            25 Z            42 q            59 7
91           9 J            26 a            43 r            60 8
92          10 K            27 b            44 s            61 9
93          11 L            28 c            45 t            62 +
94          12 M            29 d            46 u            63 /
95          13 N            30 e            47 v
96          14 O            31 f            48 w         (pad) =
97          15 P            32 g            49 x
98          16 Q            33 h            50 y
99 
100    Special processing is performed if fewer than 24 bits are available
101    at the end of the data being encoded.  A full encoding quantum is
102    always completed at the end of a quantity.  When fewer than 24 input
103    bits are available in an input group, zero bits are added (on the
104    right) to form an integral number of 6-bit groups.  Padding at the
105    end of the data is performed using the '=' character.
106 
107    Since all base64 input is an integral number of octets, only the
108          -------------------------------------------------
109    following cases can arise:
110 
111        (1) the final quantum of encoding input is an integral
112            multiple of 24 bits; here, the final unit of encoded
113 	   output will be an integral multiple of 4 characters
114 	   with no "=" padding,
115        (2) the final quantum of encoding input is exactly 8 bits;
116            here, the final unit of encoded output will be two
117 	   characters followed by two "=" padding characters, or
118        (3) the final quantum of encoding input is exactly 16 bits;
119            here, the final unit of encoded output will be three
120 	   characters followed by one "=" padding character.
121    */
122 
123 int
124 b64_ntop(src, srclength, target, targsize)
125 	unsigned char const *src;
126 	size_t srclength;
127 	char *target;
128 	size_t targsize;
129 {
130 	size_t datalength = 0;
131 	unsigned char input[3];
132 	unsigned char output[4];
133 	int i;
134 
135 	while (2 < srclength) {
136 		input[0] = *src++;
137 		input[1] = *src++;
138 		input[2] = *src++;
139 		srclength -= 3;
140 
141 		output[0] = input[0] >> 2;
142 		output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
143 		output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
144 		output[3] = input[2] & 0x3f;
145 
146 		if (datalength + 4 > targsize)
147 			return (-1);
148 		target[datalength++] = Base64[output[0]];
149 		target[datalength++] = Base64[output[1]];
150 		target[datalength++] = Base64[output[2]];
151 		target[datalength++] = Base64[output[3]];
152 	}
153 
154 	/* Now we worry about padding. */
155 	if (0 != srclength) {
156 		/* Get what's left. */
157 		input[0] = input[1] = input[2] = '\0';
158 		for (i = 0; i < srclength; i++)
159 			input[i] = *src++;
160 
161 		output[0] = input[0] >> 2;
162 		output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
163 		output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
164 
165 		if (datalength + 4 > targsize)
166 			return (-1);
167 		target[datalength++] = Base64[output[0]];
168 		target[datalength++] = Base64[output[1]];
169 		if (srclength == 1)
170 			target[datalength++] = Pad64;
171 		else
172 			target[datalength++] = Base64[output[2]];
173 		target[datalength++] = Pad64;
174 	}
175 	if (datalength >= targsize)
176 		return (-1);
177 	target[datalength] = '\0';	/* Returned value doesn't count \0. */
178 	return (datalength);
179 }
180 
181 /* skips all whitespace anywhere.
182    converts characters, four at a time, starting at (or after)
183    src from base - 64 numbers into three 8 bit bytes in the target area.
184    it returns the number of data bytes stored at the target, or -1 on error.
185  */
186 
187 int
188 b64_pton(src, target, targsize)
189 	char const *src;
190 	unsigned char *target;
191 	size_t targsize;
192 {
193 	int tarindex, state, ch;
194 	unsigned char nextbyte;
195 	char *pos;
196 
197 	state = 0;
198 	tarindex = 0;
199 
200 	while ((ch = (unsigned char)*src++) != '\0') {
201 		if (isspace(ch))	/* Skip whitespace anywhere. */
202 			continue;
203 
204 		if (ch == Pad64)
205 			break;
206 
207 		pos = strchr(Base64, ch);
208 		if (pos == 0)		/* A non-base64 character. */
209 			return (-1);
210 
211 		switch (state) {
212 		case 0:
213 			if (target) {
214 				if (tarindex >= targsize)
215 					return (-1);
216 				target[tarindex] = (pos - Base64) << 2;
217 			}
218 			state = 1;
219 			break;
220 		case 1:
221 			if (target) {
222 				if (tarindex >= targsize)
223 					return (-1);
224 				target[tarindex]   |=  (pos - Base64) >> 4;
225 				nextbyte = ((pos - Base64) & 0x0f) << 4;
226 				if (tarindex + 1 < targsize)
227 					target[tarindex+1] = nextbyte;
228 				else if (nextbyte)
229 					return (-1);
230 			}
231 			tarindex++;
232 			state = 2;
233 			break;
234 		case 2:
235 			if (target) {
236 				if (tarindex >= targsize)
237 					return (-1);
238 				target[tarindex]   |=  (pos - Base64) >> 2;
239 				nextbyte = ((pos - Base64) & 0x03) << 6;
240 				if (tarindex + 1 < targsize)
241 					target[tarindex+1] = nextbyte;
242 				else if (nextbyte)
243 					return (-1);
244 			}
245 			tarindex++;
246 			state = 3;
247 			break;
248 		case 3:
249 			if (target) {
250 				if (tarindex >= targsize)
251 					return (-1);
252 				target[tarindex] |= (pos - Base64);
253 			}
254 			tarindex++;
255 			state = 0;
256 			break;
257 		}
258 	}
259 
260 	/*
261 	 * We are done decoding Base-64 chars.  Let's see if we ended
262 	 * on a byte boundary, and/or with erroneous trailing characters.
263 	 */
264 
265 	if (ch == Pad64) {			/* We got a pad char. */
266 		ch = (unsigned char)*src++;	/* Skip it, get next. */
267 		switch (state) {
268 		case 0:		/* Invalid = in first position */
269 		case 1:		/* Invalid = in second position */
270 			return (-1);
271 
272 		case 2:		/* Valid, means one byte of info */
273 			/* Skip any number of spaces. */
274 			for (; ch != '\0'; ch = (unsigned char)*src++)
275 				if (!isspace(ch))
276 					break;
277 			/* Make sure there is another trailing = sign. */
278 			if (ch != Pad64)
279 				return (-1);
280 			ch = (unsigned char)*src++;		/* Skip the = */
281 			/* Fall through to "single trailing =" case. */
282 			/* FALLTHROUGH */
283 
284 		case 3:		/* Valid, means two bytes of info */
285 			/*
286 			 * We know this char is an =.  Is there anything but
287 			 * whitespace after it?
288 			 */
289 			for (; ch != '\0'; ch = (unsigned char)*src++)
290 				if (!isspace(ch))
291 					return (-1);
292 
293 			/*
294 			 * Now make sure for cases 2 and 3 that the "extra"
295 			 * bits that slopped past the last full byte were
296 			 * zeros.  If we don't check them, they become a
297 			 * subliminal channel.
298 			 */
299 			if (target && tarindex < targsize &&
300 			    target[tarindex] != 0)
301 				return (-1);
302 		}
303 	} else {
304 		/*
305 		 * We ended by seeing the end of the string.  Make sure we
306 		 * have no partial bytes lying around.
307 		 */
308 		if (state != 0)
309 			return (-1);
310 	}
311 
312 	return (tarindex);
313 }
314