xref: /netbsd-src/crypto/external/bsd/netpgp/dist/src/pgp2ssh/b64.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*********************************************************************\
2 
3 MODULE NAME:    b64.c
4 
5 AUTHOR:         Bob Trower 08/04/01
6 
7 PROJECT:        Crypt Data Packaging
8 
9 COPYRIGHT:      Copyright (c) Trantor Standard Systems Inc., 2001
10 
11 NOTE:           This source code may be used as you wish, subject to
12                 the MIT license.  See the LICENCE section below.
13 
14 DESCRIPTION:
15                 This little utility implements the Base64
16                 Content-Transfer-Encoding standard described in
17                 RFC1113 (http://www.faqs.org/rfcs/rfc1113.html).
18 
19                 This is the coding scheme used by MIME to allow
20                 binary data to be transferred by SMTP mail.
21 
22                 Groups of 3 bytes from a binary stream are coded as
23                 groups of 4 bytes in a text stream.
24 
25                 The input stream is 'padded' with zeros to create
26                 an input that is an even multiple of 3.
27 
28                 A special character ('=') is used to denote padding so
29                 that the stream can be decoded back to its exact size.
30 
31                 Encoded output is formatted in lines which should
32                 be a maximum of 72 characters to conform to the
33                 specification.  This program defaults to 72 characters,
34                 but will allow more or less through the use of a
35                 switch.  The program enforces a minimum line size
36                 of 4 characters.
37 
38                 Example encoding:
39 
40                 The stream 'ABCD' is 32 bits long.  It is mapped as
41                 follows:
42 
43                 ABCD
44 
45                  A (65)     B (66)     C (67)     D (68)   (None) (None)
46                 01000001   01000010   01000011   01000100
47 
48                 16 (Q)  20 (U)  9 (J)   3 (D)    17 (R) 0 (A)  NA (=) NA (=)
49                 010000  010100  001001  000011   010001 000000 000000 000000
50 
51 
52                 QUJDRA==
53 
54                 Decoding is the process in reverse.  A 'decode' lookup
55                 table has been created to avoid string scans.
56 
57 DESIGN GOALS:	Specifically:
58 		Code is a stand-alone utility to perform base64
59 		encoding/decoding. It should be genuinely useful
60 		when the need arises and it meets a need that is
61 		likely to occur for some users.
62 		Code acts as sample code to show the author's
63 		design and coding style.
64 
65 		Generally:
66 		This program is designed to survive:
67 		Everything you need is in a single source file.
68 		It compiles cleanly using a vanilla ANSI C compiler.
69 		It does its job correctly with a minimum of fuss.
70 		The code is not overly clever, not overly simplistic
71 		and not overly verbose.
72 		Access is 'cut and paste' from a web page.
73 		Terms of use are reasonable.
74 
75 VALIDATION:     Non-trivial code is never without errors.  This
76                 file likely has some problems, since it has only
77                 been tested by the author.  It is expected with most
78                 source code that there is a period of 'burn-in' when
79                 problems are identified and corrected.  That being
80                 said, it is possible to have 'reasonably correct'
81                 code by following a regime of unit test that covers
82                 the most likely cases and regression testing prior
83                 to release.  This has been done with this code and
84                 it has a good probability of performing as expected.
85 
86                 Unit Test Cases:
87 
88                 case 0:empty file:
89                     CASE0.DAT  ->  ->
90                     (Zero length target file created
91                     on both encode and decode.)
92 
93                 case 1:One input character:
94                     CASE1.DAT A -> QQ== -> A
95 
96                 case 2:Two input characters:
97                     CASE2.DAT AB -> QUJD -> AB
98 
99                 case 3:Three input characters:
100                     CASE3.DAT ABC -> QUJD -> ABC
101 
102                 case 4:Four input characters:
103                     case4.dat ABCD -> QUJDRA== -> ABCD
104 
105                 case 5:All chars from 0 to ff, linesize set to 50:
106 
107                     AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIj
108                     JCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZH
109                     SElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWpr
110                     bG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6P
111                     kJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKz
112                     tLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX
113                     2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7
114                     /P3+/w==
115 
116                 case 6:Mime Block from e-mail:
117                     (Data same as test case 5)
118 
119                 case 7: Large files:
120                     Tested 28 MB file in/out.
121 
122                 case 8: Random Binary Integrity:
123                     This binary program (b64.exe) was encoded to base64,
124                     back to binary and then executed.
125 
126                 case 9 Stress:
127                     All files in a working directory encoded/decoded
128                     and compared with file comparison utility to
129                     ensure that multiple runs do not cause problems
130                     such as exhausting file handles, tmp storage, etc.
131 
132                 -------------
133 
134                 Syntax, operation and failure:
135                     All options/switches tested.  Performs as
136                     expected.
137 
138                 case 10:
139                     No Args -- Shows Usage Screen
140                     Return Code 1 (Invalid Syntax)
141                 case 11:
142                     One Arg (invalid) -- Shows Usage Screen
143                     Return Code 1 (Invalid Syntax)
144                 case 12:
145                     One Arg Help (-?) -- Shows detailed Usage Screen.
146                     Return Code 0 (Success -- help request is valid).
147                 case 13:
148                     One Arg Help (-h) -- Shows detailed Usage Screen.
149                     Return Code 0 (Success -- help request is valid).
150                 case 14:
151                     One Arg (valid) -- Uses stdin/stdout (filter)
152                     Return Code 0 (Sucess)
153                 case 15:
154                     Two Args (invalid file) -- shows system error.
155                     Return Code 2 (File Error)
156                 case 16:
157                     Encode non-existent file -- shows system error.
158                     Return Code 2 (File Error)
159                 case 17:
160                     Out of disk space -- shows system error.
161                     Return Code 3 (File I/O Error)
162 
163                 -------------
164 
165                 Compile/Regression test:
166                     gcc compiled binary under Cygwin
167                     Microsoft Visual Studio under Windows 2000
168                     Microsoft Version 6.0 C under Windows 2000
169 
170 DEPENDENCIES:   None
171 
172 LICENCE:        Copyright (c) 2001 Bob Trower, Trantor Standard Systems Inc.
173 
174                 Permission is hereby granted, free of charge, to any person
175                 obtaining a copy of this software and associated
176                 documentation files (the "Software"), to deal in the
177                 Software without restriction, including without limitation
178                 the rights to use, copy, modify, merge, publish, distribute,
179                 sublicense, and/or sell copies of the Software, and to
180                 permit persons to whom the Software is furnished to do so,
181                 subject to the following conditions:
182 
183                 The above copyright notice and this permission notice shall
184                 be included in all copies or substantial portions of the
185                 Software.
186 
187                 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
188                 KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
189                 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
190                 PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
191                 OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
192                 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
193                 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
194                 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
195 
196 VERSION HISTORY:
197                 Bob Trower 08/04/01 -- Create Version 0.00.00B
198 
199 \******************************************************************* */
200 
201 #include <inttypes.h>
202 #include <stdio.h>
203 #include <stdlib.h>
204 
205 #include "b64.h"
206 
207 /*
208 ** Translation Table as described in RFC1113
209 */
210 static const char cb64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
211 
212 /*
213 ** Translation Table to decode (created by author)
214 */
215 static const char cd64[] = "|$$$}rstuvwxyz{$$$$$$$>?@ABCDEFGHIJKLMNOPQRSTUVW$$$$$$XYZ[\\]^_`abcdefghijklmnopq";
216 
217 /*
218 ** encodeblock
219 **
220 ** encode 3 8-bit binary bytes as 4 '6-bit' characters
221 */
222 static void
223 encodeblock(uint8_t *wordin, uint8_t *wordout, int wordlen)
224 {
225 	wordout[0] = cb64[(unsigned)wordin[0] >> 2];
226 	wordout[1] = cb64[((unsigned)(wordin[0] & 0x03) << 4) | ((unsigned)(wordin[1] & 0xf0) >> 4)];
227 	wordout[2] = (uint8_t)(wordlen > 1) ?
228 		cb64[((unsigned)(wordin[1] & 0x0f) << 2) | ((unsigned)(wordin[2] & 0xc0) >> 6)] : '=';
229 	wordout[3] = (uint8_t)(wordlen > 2) ? cb64[wordin[2] & 0x3f] : '=';
230 }
231 
232 /*
233 ** encode
234 **
235 ** base64 encode a stream adding padding and line breaks as per spec.
236 */
237 int
238 b64encode(const char *in, const size_t insize, void *vp, size_t outsize, int linesize)
239 {
240 	const char	*inp;
241 	unsigned	 i;
242 	uint8_t		 wordout[4];
243 	uint8_t		 wordin[3];
244 	char		*out = vp;
245 	char		*outp;
246 	int              blocksout;
247 	int              wordlen;
248 
249 	wordlen = 0;
250 	for (blocksout = 0, inp = in, outp = out; (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize;) {
251 		for (wordlen = 0, i = 0; i < sizeof(wordin); i++) {
252 			wordin[i] = (uint8_t) *inp++;
253 			if ((size_t)(inp - in) <= insize) {
254 				wordlen++;
255 			} else {
256 				wordin[i] = 0x0;
257 			}
258 		}
259 		if (wordlen > 0) {
260 			encodeblock(wordin, wordout, wordlen);
261 			for (i = 0; i < sizeof(wordout) ; i++) {
262 				*outp++ = wordout[i];
263 			}
264 			blocksout++;
265 		}
266 		if (linesize > 0) {
267 			if (blocksout >= (int)(linesize / sizeof(wordout)) ||
268 			    (size_t)(inp - in) >= insize) {
269 				if (blocksout) {
270 					*outp++ = '\r';
271 					*outp++ = '\n';
272 				}
273 				blocksout = 0;
274 			}
275 		}
276 	}
277 	if (wordlen == 3 && (size_t)(outp - out) < outsize - 4) {
278 		for (i = 0 ; i < 4 ; i++) {
279 			*outp++ = '=';
280 		}
281 	}
282 	return (int)(outp - out);
283 }
284 
285 /*
286 ** decodeblock
287 **
288 ** decode 4 '6-bit' characters into 3 8-bit binary bytes
289 */
290 static void
291 decodeblock(uint8_t wordin[4], uint8_t wordout[3])
292 {
293 	wordout[0] = (uint8_t) ((unsigned)wordin[0] << 2 | (unsigned)wordin[1] >> 4);
294 	wordout[1] = (uint8_t) ((unsigned)wordin[1] << 4 | (unsigned)wordin[2] >> 2);
295 	wordout[2] = (uint8_t) (((wordin[2] << 6) & 0xc0) | wordin[3]);
296 }
297 
298 /*
299 ** decode
300 **
301 ** decode a base64 encoded stream discarding padding, line breaks and noise
302 */
303 int
304 b64decode(const char *in, const size_t insize, void *vp, size_t outsize)
305 {
306 	const char	*inp;
307 	unsigned	 wordlen;
308 	unsigned	 i;
309 	uint8_t    	 wordout[3];
310 	uint8_t    	 wordin[4];
311 	uint8_t    	 v;
312 	char		*out = vp;
313 	char		*outp;
314 
315 	for (inp = in, outp = out ; (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize ; ) {
316 		for (wordlen = 0, i = 0 ; i < sizeof(wordin) && (size_t)(inp - in) < insize ; i++) {
317 			/* get a single character */
318 			for (v = 0; (size_t)(inp - in) < insize && v == 0 ; ) {
319 				if (*inp == '\r' && *(inp + 1) == '\n') {
320 					inp += 2;
321 				} else {
322 					v = (uint8_t) *inp++;
323 					v = (uint8_t) ((v < 43 || v > 122) ? 0 : cd64[v - 43]);
324 					if (v) {
325 						v = (uint8_t) ((v == '$') ? 0 : v - 61);
326 					}
327 				}
328 			}
329 			/* perhaps 0x0 pad */
330 			if ((size_t)(inp - in) < insize) {
331 				wordlen += 1;
332 				if (v) {
333 					wordin[i] = (uint8_t) (v - 1);
334 				}
335 			} else {
336 				wordin[i] = 0x0;
337 			}
338 		}
339 		if (wordlen > 0) {
340 			decodeblock(wordin, wordout);
341 			for (i = 0; i < wordlen - 1 ; i++) {
342 				*outp++ = wordout[i];
343 			}
344 		}
345 	}
346 	return (int)(outp - out);
347 }
348