xref: /netbsd-src/lib/libcrypt/crypt.c (revision da9817918ec7e88db2912a2882967c7570a83f47)
1 /*	$NetBSD: crypt.c,v 1.28 2009/05/01 00:28:17 perry Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Tom Truscott.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 #if !defined(lint)
37 #if 0
38 static char sccsid[] = "@(#)crypt.c	8.1.1.1 (Berkeley) 8/18/93";
39 #else
40 __RCSID("$NetBSD: crypt.c,v 1.28 2009/05/01 00:28:17 perry Exp $");
41 #endif
42 #endif /* not lint */
43 
44 #include <limits.h>
45 #include <pwd.h>
46 #include <stdlib.h>
47 #include <unistd.h>
48 #if defined(DEBUG) || defined(MAIN) || defined(UNIT_TEST)
49 #include <stdio.h>
50 #endif
51 
52 #include "crypt.h"
53 
54 /*
55  * UNIX password, and DES, encryption.
56  * By Tom Truscott, trt@rti.rti.org,
57  * from algorithms by Robert W. Baldwin and James Gillogly.
58  *
59  * References:
60  * "Mathematical Cryptology for Computer Scientists and Mathematicians,"
61  * by Wayne Patterson, 1987, ISBN 0-8476-7438-X.
62  *
63  * "Password Security: A Case History," R. Morris and Ken Thompson,
64  * Communications of the ACM, vol. 22, pp. 594-597, Nov. 1979.
65  *
66  * "DES will be Totally Insecure within Ten Years," M.E. Hellman,
67  * IEEE Spectrum, vol. 16, pp. 32-39, July 1979.
68  */
69 
70 /* =====  Configuration ==================== */
71 
72 /*
73  * define "MUST_ALIGN" if your compiler cannot load/store
74  * long integers at arbitrary (e.g. odd) memory locations.
75  * (Either that or never pass unaligned addresses to des_cipher!)
76  */
77 #if !defined(__vax__) && !defined(__i386__)
78 #define	MUST_ALIGN
79 #endif
80 
81 #ifdef CHAR_BITS
82 #if CHAR_BITS != 8
83 	#error C_block structure assumes 8 bit characters
84 #endif
85 #endif
86 
87 /*
88  * define "B64" to be the declaration for a 64 bit integer.
89  * XXX this feature is currently unused, see "endian" comment below.
90  */
91 #if defined(cray)
92 #define	B64	long
93 #endif
94 #if defined(convex)
95 #define	B64	long long
96 #endif
97 
98 /*
99  * define "LARGEDATA" to get faster permutations, by using about 72 kilobytes
100  * of lookup tables.  This speeds up des_setkey() and des_cipher(), but has
101  * little effect on crypt().
102  */
103 #if defined(notdef)
104 #define	LARGEDATA
105 #endif
106 
107 /* compile with "-DSTATIC=void" when profiling */
108 #ifndef STATIC
109 #define	STATIC	static void
110 #endif
111 
112 /* ==================================== */
113 
114 /*
115  * Cipher-block representation (Bob Baldwin):
116  *
117  * DES operates on groups of 64 bits, numbered 1..64 (sigh).  One
118  * representation is to store one bit per byte in an array of bytes.  Bit N of
119  * the NBS spec is stored as the LSB of the Nth byte (index N-1) in the array.
120  * Another representation stores the 64 bits in 8 bytes, with bits 1..8 in the
121  * first byte, 9..16 in the second, and so on.  The DES spec apparently has
122  * bit 1 in the MSB of the first byte, but that is particularly noxious so we
123  * bit-reverse each byte so that bit 1 is the LSB of the first byte, bit 8 is
124  * the MSB of the first byte.  Specifically, the 64-bit input data and key are
125  * converted to LSB format, and the output 64-bit block is converted back into
126  * MSB format.
127  *
128  * DES operates internally on groups of 32 bits which are expanded to 48 bits
129  * by permutation E and shrunk back to 32 bits by the S boxes.  To speed up
130  * the computation, the expansion is applied only once, the expanded
131  * representation is maintained during the encryption, and a compression
132  * permutation is applied only at the end.  To speed up the S-box lookups,
133  * the 48 bits are maintained as eight 6 bit groups, one per byte, which
134  * directly feed the eight S-boxes.  Within each byte, the 6 bits are the
135  * most significant ones.  The low two bits of each byte are zero.  (Thus,
136  * bit 1 of the 48 bit E expansion is stored as the "4"-valued bit of the
137  * first byte in the eight byte representation, bit 2 of the 48 bit value is
138  * the "8"-valued bit, and so on.)  In fact, a combined "SPE"-box lookup is
139  * used, in which the output is the 64 bit result of an S-box lookup which
140  * has been permuted by P and expanded by E, and is ready for use in the next
141  * iteration.  Two 32-bit wide tables, SPE[0] and SPE[1], are used for this
142  * lookup.  Since each byte in the 48 bit path is a multiple of four, indexed
143  * lookup of SPE[0] and SPE[1] is simple and fast.  The key schedule and
144  * "salt" are also converted to this 8*(6+2) format.  The SPE table size is
145  * 8*64*8 = 4K bytes.
146  *
147  * To speed up bit-parallel operations (such as XOR), the 8 byte
148  * representation is "union"ed with 32 bit values "i0" and "i1", and, on
149  * machines which support it, a 64 bit value "b64".  This data structure,
150  * "C_block", has two problems.  First, alignment restrictions must be
151  * honored.  Second, the byte-order (e.g. little-endian or big-endian) of
152  * the architecture becomes visible.
153  *
154  * The byte-order problem is unfortunate, since on the one hand it is good
155  * to have a machine-independent C_block representation (bits 1..8 in the
156  * first byte, etc.), and on the other hand it is good for the LSB of the
157  * first byte to be the LSB of i0.  We cannot have both these things, so we
158  * currently use the "little-endian" representation and avoid any multi-byte
159  * operations that depend on byte order.  This largely precludes use of the
160  * 64-bit datatype since the relative order of i0 and i1 are unknown.  It
161  * also inhibits grouping the SPE table to look up 12 bits at a time.  (The
162  * 12 bits can be stored in a 16-bit field with 3 low-order zeroes and 1
163  * high-order zero, providing fast indexing into a 64-bit wide SPE.)  On the
164  * other hand, 64-bit datatypes are currently rare, and a 12-bit SPE lookup
165  * requires a 128 kilobyte table, so perhaps this is not a big loss.
166  *
167  * Permutation representation (Jim Gillogly):
168  *
169  * A transformation is defined by its effect on each of the 8 bytes of the
170  * 64-bit input.  For each byte we give a 64-bit output that has the bits in
171  * the input distributed appropriately.  The transformation is then the OR
172  * of the 8 sets of 64-bits.  This uses 8*256*8 = 16K bytes of storage for
173  * each transformation.  Unless LARGEDATA is defined, however, a more compact
174  * table is used which looks up 16 4-bit "chunks" rather than 8 8-bit chunks.
175  * The smaller table uses 16*16*8 = 2K bytes for each transformation.  This
176  * is slower but tolerable, particularly for password encryption in which
177  * the SPE transformation is iterated many times.  The small tables total 9K
178  * bytes, the large tables total 72K bytes.
179  *
180  * The transformations used are:
181  * IE3264: MSB->LSB conversion, initial permutation, and expansion.
182  *	This is done by collecting the 32 even-numbered bits and applying
183  *	a 32->64 bit transformation, and then collecting the 32 odd-numbered
184  *	bits and applying the same transformation.  Since there are only
185  *	32 input bits, the IE3264 transformation table is half the size of
186  *	the usual table.
187  * CF6464: Compression, final permutation, and LSB->MSB conversion.
188  *	This is done by two trivial 48->32 bit compressions to obtain
189  *	a 64-bit block (the bit numbering is given in the "CIFP" table)
190  *	followed by a 64->64 bit "cleanup" transformation.  (It would
191  *	be possible to group the bits in the 64-bit block so that 2
192  *	identical 32->32 bit transformations could be used instead,
193  *	saving a factor of 4 in space and possibly 2 in time, but
194  *	byte-ordering and other complications rear their ugly head.
195  *	Similar opportunities/problems arise in the key schedule
196  *	transforms.)
197  * PC1ROT: MSB->LSB, PC1 permutation, rotate, and PC2 permutation.
198  *	This admittedly baroque 64->64 bit transformation is used to
199  *	produce the first code (in 8*(6+2) format) of the key schedule.
200  * PC2ROT[0]: Inverse PC2 permutation, rotate, and PC2 permutation.
201  *	It would be possible to define 15 more transformations, each
202  *	with a different rotation, to generate the entire key schedule.
203  *	To save space, however, we instead permute each code into the
204  *	next by using a transformation that "undoes" the PC2 permutation,
205  *	rotates the code, and then applies PC2.  Unfortunately, PC2
206  *	transforms 56 bits into 48 bits, dropping 8 bits, so PC2 is not
207  *	invertible.  We get around that problem by using a modified PC2
208  *	which retains the 8 otherwise-lost bits in the unused low-order
209  *	bits of each byte.  The low-order bits are cleared when the
210  *	codes are stored into the key schedule.
211  * PC2ROT[1]: Same as PC2ROT[0], but with two rotations.
212  *	This is faster than applying PC2ROT[0] twice,
213  *
214  * The Bell Labs "salt" (Bob Baldwin):
215  *
216  * The salting is a simple permutation applied to the 48-bit result of E.
217  * Specifically, if bit i (1 <= i <= 24) of the salt is set then bits i and
218  * i+24 of the result are swapped.  The salt is thus a 24 bit number, with
219  * 16777216 possible values.  (The original salt was 12 bits and could not
220  * swap bits 13..24 with 36..48.)
221  *
222  * It is possible, but ugly, to warp the SPE table to account for the salt
223  * permutation.  Fortunately, the conditional bit swapping requires only
224  * about four machine instructions and can be done on-the-fly with about an
225  * 8% performance penalty.
226  */
227 
228 typedef union {
229 	unsigned char b[8];
230 	struct {
231 		int32_t	i0;
232 		int32_t	i1;
233 	} b32;
234 #if defined(B64)
235 	B64	b64;
236 #endif
237 } C_block;
238 
239 /*
240  * Convert twenty-four-bit long in host-order
241  * to six bits (and 2 low-order zeroes) per char little-endian format.
242  */
243 #define	TO_SIX_BIT(rslt, src) {				\
244 		C_block cvt;				\
245 		cvt.b[0] = src; src >>= 6;		\
246 		cvt.b[1] = src; src >>= 6;		\
247 		cvt.b[2] = src; src >>= 6;		\
248 		cvt.b[3] = src;				\
249 		rslt = (cvt.b32.i0 & 0x3f3f3f3fL) << 2;	\
250 	}
251 
252 /*
253  * These macros may someday permit efficient use of 64-bit integers.
254  */
255 #define	ZERO(d,d0,d1)			d0 = 0, d1 = 0
256 #define	LOAD(d,d0,d1,bl)		d0 = (bl).b32.i0, d1 = (bl).b32.i1
257 #define	LOADREG(d,d0,d1,s,s0,s1)	d0 = s0, d1 = s1
258 #define	OR(d,d0,d1,bl)			d0 |= (bl).b32.i0, d1 |= (bl).b32.i1
259 #define	STORE(s,s0,s1,bl)		(bl).b32.i0 = s0, (bl).b32.i1 = s1
260 #define	DCL_BLOCK(d,d0,d1)		int32_t d0, d1
261 
262 #if defined(LARGEDATA)
263 	/* Waste memory like crazy.  Also, do permutations in line */
264 #define	LGCHUNKBITS	3
265 #define	CHUNKBITS	(1<<LGCHUNKBITS)
266 #define	PERM6464(d,d0,d1,cpp,p)				\
267 	LOAD(d,d0,d1,(p)[(0<<CHUNKBITS)+(cpp)[0]]);		\
268 	OR (d,d0,d1,(p)[(1<<CHUNKBITS)+(cpp)[1]]);		\
269 	OR (d,d0,d1,(p)[(2<<CHUNKBITS)+(cpp)[2]]);		\
270 	OR (d,d0,d1,(p)[(3<<CHUNKBITS)+(cpp)[3]]);		\
271 	OR (d,d0,d1,(p)[(4<<CHUNKBITS)+(cpp)[4]]);		\
272 	OR (d,d0,d1,(p)[(5<<CHUNKBITS)+(cpp)[5]]);		\
273 	OR (d,d0,d1,(p)[(6<<CHUNKBITS)+(cpp)[6]]);		\
274 	OR (d,d0,d1,(p)[(7<<CHUNKBITS)+(cpp)[7]]);
275 #define	PERM3264(d,d0,d1,cpp,p)				\
276 	LOAD(d,d0,d1,(p)[(0<<CHUNKBITS)+(cpp)[0]]);		\
277 	OR (d,d0,d1,(p)[(1<<CHUNKBITS)+(cpp)[1]]);		\
278 	OR (d,d0,d1,(p)[(2<<CHUNKBITS)+(cpp)[2]]);		\
279 	OR (d,d0,d1,(p)[(3<<CHUNKBITS)+(cpp)[3]]);
280 #else
281 	/* "small data" */
282 #define	LGCHUNKBITS	2
283 #define	CHUNKBITS	(1<<LGCHUNKBITS)
284 #define	PERM6464(d,d0,d1,cpp,p)				\
285 	{ C_block tblk; permute(cpp,&tblk,p,8); LOAD (d,d0,d1,tblk); }
286 #define	PERM3264(d,d0,d1,cpp,p)				\
287 	{ C_block tblk; permute(cpp,&tblk,p,4); LOAD (d,d0,d1,tblk); }
288 #endif /* LARGEDATA */
289 
290 STATIC	init_des(void);
291 STATIC	init_perm(C_block [64/CHUNKBITS][1<<CHUNKBITS],
292 		       const unsigned char [64], int, int);
293 #ifndef LARGEDATA
294 STATIC	permute(const unsigned char *, C_block *, C_block *, int);
295 #endif
296 #ifdef DEBUG
297 STATIC	prtab(const char *, unsigned char *, int);
298 #endif
299 
300 
301 #ifndef LARGEDATA
302 STATIC
303 permute(const unsigned char *cp, C_block *out, C_block *p, int chars_in)
304 {
305 	DCL_BLOCK(D,D0,D1);
306 	C_block *tp;
307 	int t;
308 
309 	ZERO(D,D0,D1);
310 	do {
311 		t = *cp++;
312 		tp = &p[t&0xf]; OR(D,D0,D1,*tp); p += (1<<CHUNKBITS);
313 		tp = &p[t>>4];  OR(D,D0,D1,*tp); p += (1<<CHUNKBITS);
314 	} while (--chars_in > 0);
315 	STORE(D,D0,D1,*out);
316 }
317 #endif /* LARGEDATA */
318 
319 
320 /* =====  (mostly) Standard DES Tables ==================== */
321 
322 static const unsigned char IP[] = {	/* initial permutation */
323 	58, 50, 42, 34, 26, 18, 10,  2,
324 	60, 52, 44, 36, 28, 20, 12,  4,
325 	62, 54, 46, 38, 30, 22, 14,  6,
326 	64, 56, 48, 40, 32, 24, 16,  8,
327 	57, 49, 41, 33, 25, 17,  9,  1,
328 	59, 51, 43, 35, 27, 19, 11,  3,
329 	61, 53, 45, 37, 29, 21, 13,  5,
330 	63, 55, 47, 39, 31, 23, 15,  7,
331 };
332 
333 /* The final permutation is the inverse of IP - no table is necessary */
334 
335 static const unsigned char ExpandTr[] = {	/* expansion operation */
336 	32,  1,  2,  3,  4,  5,
337 	 4,  5,  6,  7,  8,  9,
338 	 8,  9, 10, 11, 12, 13,
339 	12, 13, 14, 15, 16, 17,
340 	16, 17, 18, 19, 20, 21,
341 	20, 21, 22, 23, 24, 25,
342 	24, 25, 26, 27, 28, 29,
343 	28, 29, 30, 31, 32,  1,
344 };
345 
346 static const unsigned char PC1[] = {	/* permuted choice table 1 */
347 	57, 49, 41, 33, 25, 17,  9,
348 	 1, 58, 50, 42, 34, 26, 18,
349 	10,  2, 59, 51, 43, 35, 27,
350 	19, 11,  3, 60, 52, 44, 36,
351 
352 	63, 55, 47, 39, 31, 23, 15,
353 	 7, 62, 54, 46, 38, 30, 22,
354 	14,  6, 61, 53, 45, 37, 29,
355 	21, 13,  5, 28, 20, 12,  4,
356 };
357 
358 static const unsigned char Rotates[] = {/* PC1 rotation schedule */
359 	1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1,
360 };
361 
362 /* note: each "row" of PC2 is left-padded with bits that make it invertible */
363 static const unsigned char PC2[] = {	/* permuted choice table 2 */
364 	 9, 18,    14, 17, 11, 24,  1,  5,
365 	22, 25,     3, 28, 15,  6, 21, 10,
366 	35, 38,    23, 19, 12,  4, 26,  8,
367 	43, 54,    16,  7, 27, 20, 13,  2,
368 
369 	 0,  0,    41, 52, 31, 37, 47, 55,
370 	 0,  0,    30, 40, 51, 45, 33, 48,
371 	 0,  0,    44, 49, 39, 56, 34, 53,
372 	 0,  0,    46, 42, 50, 36, 29, 32,
373 };
374 
375 static const unsigned char S[8][64] = {	/* 48->32 bit substitution tables */
376 					/* S[1]			*/
377 	{ 14,  4, 13,  1,  2, 15, 11,  8,  3, 10,  6, 12,  5,  9,  0,  7,
378 	   0, 15,  7,  4, 14,  2, 13,  1, 10,  6, 12, 11,  9,  5,  3,  8,
379 	   4,  1, 14,  8, 13,  6,  2, 11, 15, 12,  9,  7,  3, 10,  5,  0,
380 	  15, 12,  8,  2,  4,  9,  1,  7,  5, 11,  3, 14, 10,  0,  6, 13 },
381 					/* S[2]			*/
382 	{ 15,  1,  8, 14,  6, 11,  3,  4,  9,  7,  2, 13, 12,  0,  5, 10,
383 	   3, 13,  4,  7, 15,  2,  8, 14, 12,  0,  1, 10,  6,  9, 11,  5,
384 	   0, 14,  7, 11, 10,  4, 13,  1,  5,  8, 12,  6,  9,  3,  2, 15,
385 	  13,  8, 10,  1,  3, 15,  4,  2, 11,  6,  7, 12,  0,  5, 14,  9 },
386 					/* S[3]			*/
387 	{ 10,  0,  9, 14,  6,  3, 15,  5,  1, 13, 12,  7, 11,  4,  2,  8,
388 	  13,  7,  0,  9,  3,  4,  6, 10,  2,  8,  5, 14, 12, 11, 15,  1,
389 	  13,  6,  4,  9,  8, 15,  3,  0, 11,  1,  2, 12,  5, 10, 14,  7,
390 	   1, 10, 13,  0,  6,  9,  8,  7,  4, 15, 14,  3, 11,  5,  2, 12 },
391 					/* S[4]			*/
392 	{  7, 13, 14,  3,  0,  6,  9, 10,  1,  2,  8,  5, 11, 12,  4, 15,
393 	  13,  8, 11,  5,  6, 15,  0,  3,  4,  7,  2, 12,  1, 10, 14,  9,
394 	  10,  6,  9,  0, 12, 11,  7, 13, 15,  1,  3, 14,  5,  2,  8,  4,
395 	   3, 15,  0,  6, 10,  1, 13,  8,  9,  4,  5, 11, 12,  7,  2, 14 },
396 					/* S[5]			*/
397 	{  2, 12,  4,  1,  7, 10, 11,  6,  8,  5,  3, 15, 13,  0, 14,  9,
398 	  14, 11,  2, 12,  4,  7, 13,  1,  5,  0, 15, 10,  3,  9,  8,  6,
399 	   4,  2,  1, 11, 10, 13,  7,  8, 15,  9, 12,  5,  6,  3,  0, 14,
400 	  11,  8, 12,  7,  1, 14,  2, 13,  6, 15,  0,  9, 10,  4,  5,  3 },
401 					/* S[6]			*/
402 	{ 12,  1, 10, 15,  9,  2,  6,  8,  0, 13,  3,  4, 14,  7,  5, 11,
403 	  10, 15,  4,  2,  7, 12,  9,  5,  6,  1, 13, 14,  0, 11,  3,  8,
404 	   9, 14, 15,  5,  2,  8, 12,  3,  7,  0,  4, 10,  1, 13, 11,  6,
405 	   4,  3,  2, 12,  9,  5, 15, 10, 11, 14,  1,  7,  6,  0,  8, 13 },
406 					/* S[7]			*/
407 	{  4, 11,  2, 14, 15,  0,  8, 13,  3, 12,  9,  7,  5, 10,  6,  1,
408 	  13,  0, 11,  7,  4,  9,  1, 10, 14,  3,  5, 12,  2, 15,  8,  6,
409 	   1,  4, 11, 13, 12,  3,  7, 14, 10, 15,  6,  8,  0,  5,  9,  2,
410 	   6, 11, 13,  8,  1,  4, 10,  7,  9,  5,  0, 15, 14,  2,  3, 12 },
411 					/* S[8]			*/
412 	{ 13,  2,  8,  4,  6, 15, 11,  1, 10,  9,  3, 14,  5,  0, 12,  7,
413 	   1, 15, 13,  8, 10,  3,  7,  4, 12,  5,  6, 11,  0, 14,  9,  2,
414 	   7, 11,  4,  1,  9, 12, 14,  2,  0,  6, 10, 13, 15,  3,  5,  8,
415 	   2,  1, 14,  7,  4, 10,  8, 13, 15, 12,  9,  0,  3,  5,  6, 11 }
416 };
417 
418 static const unsigned char P32Tr[] = {	/* 32-bit permutation function */
419 	16,  7, 20, 21,
420 	29, 12, 28, 17,
421 	 1, 15, 23, 26,
422 	 5, 18, 31, 10,
423 	 2,  8, 24, 14,
424 	32, 27,  3,  9,
425 	19, 13, 30,  6,
426 	22, 11,  4, 25,
427 };
428 
429 static const unsigned char CIFP[] = {	/* compressed/interleaved permutation */
430 	 1,  2,  3,  4,   17, 18, 19, 20,
431 	 5,  6,  7,  8,   21, 22, 23, 24,
432 	 9, 10, 11, 12,   25, 26, 27, 28,
433 	13, 14, 15, 16,   29, 30, 31, 32,
434 
435 	33, 34, 35, 36,   49, 50, 51, 52,
436 	37, 38, 39, 40,   53, 54, 55, 56,
437 	41, 42, 43, 44,   57, 58, 59, 60,
438 	45, 46, 47, 48,   61, 62, 63, 64,
439 };
440 
441 static const unsigned char itoa64[] =		/* 0..63 => ascii-64 */
442 	"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
443 
444 
445 /* =====  Tables that are initialized at run time  ==================== */
446 
447 
448 static unsigned char a64toi[128];	/* ascii-64 => 0..63 */
449 
450 /* Initial key schedule permutation */
451 static C_block	PC1ROT[64/CHUNKBITS][1<<CHUNKBITS];
452 
453 /* Subsequent key schedule rotation permutations */
454 static C_block	PC2ROT[2][64/CHUNKBITS][1<<CHUNKBITS];
455 
456 /* Initial permutation/expansion table */
457 static C_block	IE3264[32/CHUNKBITS][1<<CHUNKBITS];
458 
459 /* Table that combines the S, P, and E operations.  */
460 static int32_t SPE[2][8][64];
461 
462 /* compressed/interleaved => final permutation table */
463 static C_block	CF6464[64/CHUNKBITS][1<<CHUNKBITS];
464 
465 
466 /* ==================================== */
467 
468 
469 static C_block	constdatablock;			/* encryption constant */
470 static char	cryptresult[1+4+4+11+1];	/* encrypted result */
471 
472 
473 /*
474  * Return a pointer to static data consisting of the "setting"
475  * followed by an encryption produced by the "key" and "setting".
476  */
477 char *
478 crypt(const char *key, const char *setting)
479 {
480 	char *encp;
481 	int32_t i;
482 	int t;
483 	int32_t salt;
484 	int num_iter, salt_size;
485 	C_block keyblock, rsltblock;
486 
487 	/* Non-DES encryption schemes hook in here. */
488 	if (setting[0] == _PASSWORD_NONDES) {
489 		switch (setting[1]) {
490 		case '2':
491 			return (__bcrypt(key, setting));
492 		case 's':
493 			return (__crypt_sha1(key, setting));
494 		case '1':
495 		default:
496 			return (__md5crypt(key, setting));
497 		}
498 	}
499 
500 	for (i = 0; i < 8; i++) {
501 		if ((t = 2*(unsigned char)(*key)) != 0)
502 			key++;
503 		keyblock.b[i] = t;
504 	}
505 	if (des_setkey((char *)keyblock.b))	/* also initializes "a64toi" */
506 		return (NULL);
507 
508 	encp = &cryptresult[0];
509 	switch (*setting) {
510 	case _PASSWORD_EFMT1:
511 		/*
512 		 * Involve the rest of the password 8 characters at a time.
513 		 */
514 		while (*key) {
515 			if (des_cipher((char *)(void *)&keyblock,
516 			    (char *)(void *)&keyblock, 0L, 1))
517 				return (NULL);
518 			for (i = 0; i < 8; i++) {
519 				if ((t = 2*(unsigned char)(*key)) != 0)
520 					key++;
521 				keyblock.b[i] ^= t;
522 			}
523 			if (des_setkey((char *)keyblock.b))
524 				return (NULL);
525 		}
526 
527 		*encp++ = *setting++;
528 
529 		/* get iteration count */
530 		num_iter = 0;
531 		for (i = 4; --i >= 0; ) {
532 			if ((t = (unsigned char)setting[i]) == '\0')
533 				t = '.';
534 			encp[i] = t;
535 			num_iter = (num_iter<<6) | a64toi[t];
536 		}
537 		setting += 4;
538 		encp += 4;
539 		salt_size = 4;
540 		break;
541 	default:
542 		num_iter = 25;
543 		salt_size = 2;
544 	}
545 
546 	salt = 0;
547 	for (i = salt_size; --i >= 0; ) {
548 		if ((t = (unsigned char)setting[i]) == '\0')
549 			t = '.';
550 		encp[i] = t;
551 		salt = (salt<<6) | a64toi[t];
552 	}
553 	encp += salt_size;
554 	if (des_cipher((char *)(void *)&constdatablock,
555 	    (char *)(void *)&rsltblock, salt, num_iter))
556 		return (NULL);
557 
558 	/*
559 	 * Encode the 64 cipher bits as 11 ascii characters.
560 	 */
561 	i = ((int32_t)((rsltblock.b[0]<<8) | rsltblock.b[1])<<8) |
562 	    rsltblock.b[2];
563 	encp[3] = itoa64[i&0x3f];	i >>= 6;
564 	encp[2] = itoa64[i&0x3f];	i >>= 6;
565 	encp[1] = itoa64[i&0x3f];	i >>= 6;
566 	encp[0] = itoa64[i];		encp += 4;
567 	i = ((int32_t)((rsltblock.b[3]<<8) | rsltblock.b[4])<<8) |
568 	    rsltblock.b[5];
569 	encp[3] = itoa64[i&0x3f];	i >>= 6;
570 	encp[2] = itoa64[i&0x3f];	i >>= 6;
571 	encp[1] = itoa64[i&0x3f];	i >>= 6;
572 	encp[0] = itoa64[i];		encp += 4;
573 	i = ((int32_t)((rsltblock.b[6])<<8) | rsltblock.b[7])<<2;
574 	encp[2] = itoa64[i&0x3f];	i >>= 6;
575 	encp[1] = itoa64[i&0x3f];	i >>= 6;
576 	encp[0] = itoa64[i];
577 
578 	encp[3] = 0;
579 
580 	return (cryptresult);
581 }
582 
583 
584 /*
585  * The Key Schedule, filled in by des_setkey() or setkey().
586  */
587 #define	KS_SIZE	16
588 static C_block	KS[KS_SIZE];
589 
590 /*
591  * Set up the key schedule from the key.
592  */
593 int
594 des_setkey(const char *key)
595 {
596 	DCL_BLOCK(K, K0, K1);
597 	C_block *help, *ptabp;
598 	int i;
599 	static int des_ready = 0;
600 
601 	if (!des_ready) {
602 		init_des();
603 		des_ready = 1;
604 	}
605 
606 	PERM6464(K,K0,K1,(const unsigned char *)key,(C_block *)PC1ROT);
607 	help = &KS[0];
608 	STORE(K&~0x03030303L, K0&~0x03030303L, K1, *help);
609 	for (i = 1; i < 16; i++) {
610 		help++;
611 		STORE(K,K0,K1,*help);
612 		ptabp = (C_block *)PC2ROT[Rotates[i]-1];
613 		PERM6464(K,K0,K1,(const unsigned char *)help,ptabp);
614 		STORE(K&~0x03030303L, K0&~0x03030303L, K1, *help);
615 	}
616 	return (0);
617 }
618 
619 /*
620  * Encrypt (or decrypt if num_iter < 0) the 8 chars at "in" with abs(num_iter)
621  * iterations of DES, using the given 24-bit salt and the pre-computed key
622  * schedule, and store the resulting 8 chars at "out" (in == out is permitted).
623  *
624  * NOTE: the performance of this routine is critically dependent on your
625  * compiler and machine architecture.
626  */
627 int
628 des_cipher(const char *in, char *out, long salt, int num_iter)
629 {
630 	/* variables that we want in registers, most important first */
631 #if defined(pdp11)
632 	int j;
633 #endif
634 	int32_t L0, L1, R0, R1, k;
635 	C_block *kp;
636 	int ks_inc, loop_count;
637 	C_block B;
638 
639 	L0 = salt;
640 	TO_SIX_BIT(salt, L0);	/* convert to 4*(6+2) format */
641 
642 #if defined(__vax__) || defined(pdp11)
643 	salt = ~salt;	/* "x &~ y" is faster than "x & y". */
644 #define	SALT (~salt)
645 #else
646 #define	SALT salt
647 #endif
648 
649 #if defined(MUST_ALIGN)
650 	B.b[0] = in[0]; B.b[1] = in[1]; B.b[2] = in[2]; B.b[3] = in[3];
651 	B.b[4] = in[4]; B.b[5] = in[5]; B.b[6] = in[6]; B.b[7] = in[7];
652 	LOAD(L,L0,L1,B);
653 #else
654 	LOAD(L,L0,L1,*(const C_block *)in);
655 #endif
656 	LOADREG(R,R0,R1,L,L0,L1);
657 	L0 &= 0x55555555L;
658 	L1 &= 0x55555555L;
659 	L0 = (L0 << 1) | L1;	/* L0 is the even-numbered input bits */
660 	R0 &= 0xaaaaaaaaL;
661 	R1 = (R1 >> 1) & 0x55555555L;
662 	L1 = R0 | R1;		/* L1 is the odd-numbered input bits */
663 	STORE(L,L0,L1,B);
664 	PERM3264(L,L0,L1,B.b,  (C_block *)IE3264);	/* even bits */
665 	PERM3264(R,R0,R1,B.b+4,(C_block *)IE3264);	/* odd bits */
666 
667 	if (num_iter >= 0)
668 	{		/* encryption */
669 		kp = &KS[0];
670 		ks_inc  = sizeof(*kp);
671 	}
672 	else
673 	{		/* decryption */
674 		num_iter = -num_iter;
675 		kp = &KS[KS_SIZE-1];
676 		ks_inc  = -(long)sizeof(*kp);
677 	}
678 
679 	while (--num_iter >= 0) {
680 		loop_count = 8;
681 		do {
682 
683 #define	SPTAB(t, i) \
684 	    (*(int32_t *)((unsigned char *)t + i*(sizeof(int32_t)/4)))
685 #if defined(gould)
686 			/* use this if B.b[i] is evaluated just once ... */
687 #define	DOXOR(x,y,i)	x^=SPTAB(SPE[0][i],B.b[i]); y^=SPTAB(SPE[1][i],B.b[i]);
688 #else
689 #if defined(pdp11)
690 			/* use this if your "long" int indexing is slow */
691 #define	DOXOR(x,y,i)	j=B.b[i]; x^=SPTAB(SPE[0][i],j); y^=SPTAB(SPE[1][i],j);
692 #else
693 			/* use this if "k" is allocated to a register ... */
694 #define	DOXOR(x,y,i)	k=B.b[i]; x^=SPTAB(SPE[0][i],k); y^=SPTAB(SPE[1][i],k);
695 #endif
696 #endif
697 
698 #define	CRUNCH(p0, p1, q0, q1)	\
699 			k = (q0 ^ q1) & SALT;	\
700 			B.b32.i0 = k ^ q0 ^ kp->b32.i0;		\
701 			B.b32.i1 = k ^ q1 ^ kp->b32.i1;		\
702 			kp = (C_block *)((char *)kp+ks_inc);	\
703 							\
704 			DOXOR(p0, p1, 0);		\
705 			DOXOR(p0, p1, 1);		\
706 			DOXOR(p0, p1, 2);		\
707 			DOXOR(p0, p1, 3);		\
708 			DOXOR(p0, p1, 4);		\
709 			DOXOR(p0, p1, 5);		\
710 			DOXOR(p0, p1, 6);		\
711 			DOXOR(p0, p1, 7);
712 
713 			CRUNCH(L0, L1, R0, R1);
714 			CRUNCH(R0, R1, L0, L1);
715 		} while (--loop_count != 0);
716 		kp = (C_block *)((char *)kp-(ks_inc*KS_SIZE));
717 
718 
719 		/* swap L and R */
720 		L0 ^= R0;  L1 ^= R1;
721 		R0 ^= L0;  R1 ^= L1;
722 		L0 ^= R0;  L1 ^= R1;
723 	}
724 
725 	/* store the encrypted (or decrypted) result */
726 	L0 = ((L0 >> 3) & 0x0f0f0f0fL) | ((L1 << 1) & 0xf0f0f0f0L);
727 	L1 = ((R0 >> 3) & 0x0f0f0f0fL) | ((R1 << 1) & 0xf0f0f0f0L);
728 	STORE(L,L0,L1,B);
729 	PERM6464(L,L0,L1,B.b, (C_block *)CF6464);
730 #if defined(MUST_ALIGN)
731 	STORE(L,L0,L1,B);
732 	out[0] = B.b[0]; out[1] = B.b[1]; out[2] = B.b[2]; out[3] = B.b[3];
733 	out[4] = B.b[4]; out[5] = B.b[5]; out[6] = B.b[6]; out[7] = B.b[7];
734 #else
735 	STORE(L,L0,L1,*(C_block *)out);
736 #endif
737 	return (0);
738 }
739 
740 
741 /*
742  * Initialize various tables.  This need only be done once.  It could even be
743  * done at compile time, if the compiler were capable of that sort of thing.
744  */
745 STATIC
746 init_des(void)
747 {
748 	int i, j;
749 	int32_t k;
750 	int tableno;
751 	static unsigned char perm[64], tmp32[32];	/* "static" for speed */
752 
753 	/*
754 	 * table that converts chars "./0-9A-Za-z"to integers 0-63.
755 	 */
756 	for (i = 0; i < 64; i++)
757 		a64toi[itoa64[i]] = i;
758 
759 	/*
760 	 * PC1ROT - bit reverse, then PC1, then Rotate, then PC2.
761 	 */
762 	for (i = 0; i < 64; i++)
763 		perm[i] = 0;
764 	for (i = 0; i < 64; i++) {
765 		if ((k = PC2[i]) == 0)
766 			continue;
767 		k += Rotates[0]-1;
768 		if ((k%28) < Rotates[0]) k -= 28;
769 		k = PC1[k];
770 		if (k > 0) {
771 			k--;
772 			k = (k|07) - (k&07);
773 			k++;
774 		}
775 		perm[i] = k;
776 	}
777 #ifdef DEBUG
778 	prtab("pc1tab", perm, 8);
779 #endif
780 	init_perm(PC1ROT, perm, 8, 8);
781 
782 	/*
783 	 * PC2ROT - PC2 inverse, then Rotate (once or twice), then PC2.
784 	 */
785 	for (j = 0; j < 2; j++) {
786 		unsigned char pc2inv[64];
787 		for (i = 0; i < 64; i++)
788 			perm[i] = pc2inv[i] = 0;
789 		for (i = 0; i < 64; i++) {
790 			if ((k = PC2[i]) == 0)
791 				continue;
792 			pc2inv[k-1] = i+1;
793 		}
794 		for (i = 0; i < 64; i++) {
795 			if ((k = PC2[i]) == 0)
796 				continue;
797 			k += j;
798 			if ((k%28) <= j) k -= 28;
799 			perm[i] = pc2inv[k];
800 		}
801 #ifdef DEBUG
802 		prtab("pc2tab", perm, 8);
803 #endif
804 		init_perm(PC2ROT[j], perm, 8, 8);
805 	}
806 
807 	/*
808 	 * Bit reverse, then initial permutation, then expansion.
809 	 */
810 	for (i = 0; i < 8; i++) {
811 		for (j = 0; j < 8; j++) {
812 			k = (j < 2)? 0: IP[ExpandTr[i*6+j-2]-1];
813 			if (k > 32)
814 				k -= 32;
815 			else if (k > 0)
816 				k--;
817 			if (k > 0) {
818 				k--;
819 				k = (k|07) - (k&07);
820 				k++;
821 			}
822 			perm[i*8+j] = k;
823 		}
824 	}
825 #ifdef DEBUG
826 	prtab("ietab", perm, 8);
827 #endif
828 	init_perm(IE3264, perm, 4, 8);
829 
830 	/*
831 	 * Compression, then final permutation, then bit reverse.
832 	 */
833 	for (i = 0; i < 64; i++) {
834 		k = IP[CIFP[i]-1];
835 		if (k > 0) {
836 			k--;
837 			k = (k|07) - (k&07);
838 			k++;
839 		}
840 		perm[k-1] = i+1;
841 	}
842 #ifdef DEBUG
843 	prtab("cftab", perm, 8);
844 #endif
845 	init_perm(CF6464, perm, 8, 8);
846 
847 	/*
848 	 * SPE table
849 	 */
850 	for (i = 0; i < 48; i++)
851 		perm[i] = P32Tr[ExpandTr[i]-1];
852 	for (tableno = 0; tableno < 8; tableno++) {
853 		for (j = 0; j < 64; j++)  {
854 			k = (((j >> 0) &01) << 5)|
855 			    (((j >> 1) &01) << 3)|
856 			    (((j >> 2) &01) << 2)|
857 			    (((j >> 3) &01) << 1)|
858 			    (((j >> 4) &01) << 0)|
859 			    (((j >> 5) &01) << 4);
860 			k = S[tableno][k];
861 			k = (((k >> 3)&01) << 0)|
862 			    (((k >> 2)&01) << 1)|
863 			    (((k >> 1)&01) << 2)|
864 			    (((k >> 0)&01) << 3);
865 			for (i = 0; i < 32; i++)
866 				tmp32[i] = 0;
867 			for (i = 0; i < 4; i++)
868 				tmp32[4 * tableno + i] = (k >> i) & 01;
869 			k = 0;
870 			for (i = 24; --i >= 0; )
871 				k = (k<<1) | tmp32[perm[i]-1];
872 			TO_SIX_BIT(SPE[0][tableno][j], k);
873 			k = 0;
874 			for (i = 24; --i >= 0; )
875 				k = (k<<1) | tmp32[perm[i+24]-1];
876 			TO_SIX_BIT(SPE[1][tableno][j], k);
877 		}
878 	}
879 }
880 
881 /*
882  * Initialize "perm" to represent transformation "p", which rearranges
883  * (perhaps with expansion and/or contraction) one packed array of bits
884  * (of size "chars_in" characters) into another array (of size "chars_out"
885  * characters).
886  *
887  * "perm" must be all-zeroes on entry to this routine.
888  */
889 STATIC
890 init_perm(C_block perm[64/CHUNKBITS][1<<CHUNKBITS], const unsigned char p[64],
891     int chars_in, int chars_out)
892 {
893 	int i, j, k, l;
894 
895 	for (k = 0; k < chars_out*8; k++) {	/* each output bit position */
896 		l = p[k] - 1;		/* where this bit comes from */
897 		if (l < 0)
898 			continue;	/* output bit is always 0 */
899 		i = l>>LGCHUNKBITS;	/* which chunk this bit comes from */
900 		l = 1<<(l&(CHUNKBITS-1));	/* mask for this bit */
901 		for (j = 0; j < (1<<CHUNKBITS); j++) {	/* each chunk value */
902 			if ((j & l) != 0)
903 				perm[i][j].b[k>>3] |= 1<<(k&07);
904 		}
905 	}
906 }
907 
908 /*
909  * "setkey" routine (for backwards compatibility)
910  */
911 int
912 setkey(const char *key)
913 {
914 	int i, j, k;
915 	C_block keyblock;
916 
917 	for (i = 0; i < 8; i++) {
918 		k = 0;
919 		for (j = 0; j < 8; j++) {
920 			k <<= 1;
921 			k |= (unsigned char)*key++;
922 		}
923 		keyblock.b[i] = k;
924 	}
925 	return (des_setkey((char *)keyblock.b));
926 }
927 
928 /*
929  * "encrypt" routine (for backwards compatibility)
930  */
931 int
932 encrypt(char *block, int flag)
933 {
934 	int i, j, k;
935 	C_block cblock;
936 
937 	for (i = 0; i < 8; i++) {
938 		k = 0;
939 		for (j = 0; j < 8; j++) {
940 			k <<= 1;
941 			k |= (unsigned char)*block++;
942 		}
943 		cblock.b[i] = k;
944 	}
945 	if (des_cipher((char *)&cblock, (char *)&cblock, 0L, (flag ? -1: 1)))
946 		return (1);
947 	for (i = 7; i >= 0; i--) {
948 		k = cblock.b[i];
949 		for (j = 7; j >= 0; j--) {
950 			*--block = k&01;
951 			k >>= 1;
952 		}
953 	}
954 	return (0);
955 }
956 
957 #ifdef DEBUG
958 STATIC
959 prtab(const char *s, unsigned char *t, int num_rows)
960 {
961 	int i, j;
962 
963 	(void)printf("%s:\n", s);
964 	for (i = 0; i < num_rows; i++) {
965 		for (j = 0; j < 8; j++) {
966 			 (void)printf("%3d", t[i*8+j]);
967 		}
968 		(void)printf("\n");
969 	}
970 	(void)printf("\n");
971 }
972 #endif
973 
974 #if defined(MAIN) || defined(UNIT_TEST)
975 #include <err.h>
976 
977 int
978 main(int argc, char *argv[])
979 {
980     if (argc < 2)
981 	errx(1, "Usage: %s password [salt]\n", argv[0]);
982 
983     printf("%s\n", crypt(argv[1], (argc > 2) ? argv[2] : argv[1]));
984     exit(0);
985 }
986 #endif
987