xref: /netbsd-src/crypto/external/bsd/openssl/dist/engines/e_padlock.c (revision 1b9578b8c2c1f848eeb16dabbfd7d1f0d9fdefbd)
1 /*
2  * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3  * Written by Michal Ludvig <michal@logix.cz>
4  *            http://www.logix.cz/michal
5  *
6  * Big thanks to Andy Polyakov for a help with optimization,
7  * assembler fixes, port to MS Windows and a lot of other
8  * valuable work on this engine!
9  */
10 
11 /* ====================================================================
12  * Copyright (c) 1999-2001 The OpenSSL Project.  All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  *
18  * 1. Redistributions of source code must retain the above copyright
19  *    notice, this list of conditions and the following disclaimer.
20  *
21  * 2. Redistributions in binary form must reproduce the above copyright
22  *    notice, this list of conditions and the following disclaimer in
23  *    the documentation and/or other materials provided with the
24  *    distribution.
25  *
26  * 3. All advertising materials mentioning features or use of this
27  *    software must display the following acknowledgment:
28  *    "This product includes software developed by the OpenSSL Project
29  *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
30  *
31  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32  *    endorse or promote products derived from this software without
33  *    prior written permission. For written permission, please contact
34  *    licensing@OpenSSL.org.
35  *
36  * 5. Products derived from this software may not be called "OpenSSL"
37  *    nor may "OpenSSL" appear in their names without prior written
38  *    permission of the OpenSSL Project.
39  *
40  * 6. Redistributions of any form whatsoever must retain the following
41  *    acknowledgment:
42  *    "This product includes software developed by the OpenSSL Project
43  *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
49  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56  * OF THE POSSIBILITY OF SUCH DAMAGE.
57  * ====================================================================
58  *
59  * This product includes cryptographic software written by Eric Young
60  * (eay@cryptsoft.com).  This product includes software written by Tim
61  * Hudson (tjh@cryptsoft.com).
62  *
63  */
64 
65 
66 #include <stdio.h>
67 #include <string.h>
68 
69 #include <openssl/opensslconf.h>
70 #include <openssl/crypto.h>
71 #include <openssl/dso.h>
72 #include <openssl/engine.h>
73 #include <openssl/evp.h>
74 #ifndef OPENSSL_NO_AES
75 #include <openssl/aes.h>
76 #endif
77 #include <openssl/rand.h>
78 #include <openssl/err.h>
79 
80 #ifndef OPENSSL_NO_HW
81 #ifndef OPENSSL_NO_HW_PADLOCK
82 
83 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
84 #if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
85 #  ifndef OPENSSL_NO_DYNAMIC_ENGINE
86 #    define DYNAMIC_ENGINE
87 #  endif
88 #elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
89 #  ifdef ENGINE_DYNAMIC_SUPPORT
90 #    define DYNAMIC_ENGINE
91 #  endif
92 #else
93 #  error "Only OpenSSL >= 0.9.7 is supported"
94 #endif
95 
96 /* VIA PadLock AES is available *ONLY* on some x86 CPUs.
97    Not only that it doesn't exist elsewhere, but it
98    even can't be compiled on other platforms!
99 
100    In addition, because of the heavy use of inline assembler,
101    compiler choice is limited to GCC and Microsoft C. */
102 #undef COMPILE_HW_PADLOCK
103 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104 # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105      (defined(_MSC_VER) && defined(_M_IX86))
106 #  define COMPILE_HW_PADLOCK
107 static ENGINE *ENGINE_padlock (void);
108 # endif
109 #endif
110 
111 #ifdef OPENSSL_NO_DYNAMIC_ENGINE
112 
113 void ENGINE_load_padlock (void)
114 {
115 /* On non-x86 CPUs it just returns. */
116 #ifdef COMPILE_HW_PADLOCK
117 	ENGINE *toadd = ENGINE_padlock ();
118 	if (!toadd) return;
119 	ENGINE_add (toadd);
120 	ENGINE_free (toadd);
121 	ERR_clear_error ();
122 #endif
123 }
124 
125 #endif
126 
127 #ifdef COMPILE_HW_PADLOCK
128 /* We do these includes here to avoid header problems on platforms that
129    do not have the VIA padlock anyway... */
130 #include <stdlib.h>
131 #ifdef _WIN32
132 # include <malloc.h>
133 # ifndef alloca
134 #  define alloca _alloca
135 # endif
136 #elif defined(__GNUC__)
137 # ifndef alloca
138 #  define alloca(s) __builtin_alloca(s)
139 # endif
140 #endif
141 
142 /* Function for ENGINE detection and control */
143 static int padlock_available(void);
144 static int padlock_init(ENGINE *e);
145 
146 /* RNG Stuff */
147 static RAND_METHOD padlock_rand;
148 
149 /* Cipher Stuff */
150 #ifndef OPENSSL_NO_AES
151 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
152 #endif
153 
154 /* Engine names */
155 static const char *padlock_id = "padlock";
156 static char padlock_name[100];
157 
158 /* Available features */
159 static int padlock_use_ace = 0;	/* Advanced Cryptography Engine */
160 static int padlock_use_rng = 0;	/* Random Number Generator */
161 #ifndef OPENSSL_NO_AES
162 static int padlock_aes_align_required = 1;
163 #endif
164 
165 /* ===== Engine "management" functions ===== */
166 
167 /* Prepare the ENGINE structure for registration */
168 static int
169 padlock_bind_helper(ENGINE *e)
170 {
171 	/* Check available features */
172 	padlock_available();
173 
174 #if 1	/* disable RNG for now, see commentary in vicinity of RNG code */
175 	padlock_use_rng=0;
176 #endif
177 
178 	/* Generate a nice engine name with available features */
179 	BIO_snprintf(padlock_name, sizeof(padlock_name),
180 		"VIA PadLock (%s, %s)",
181 		 padlock_use_rng ? "RNG" : "no-RNG",
182 		 padlock_use_ace ? "ACE" : "no-ACE");
183 
184 	/* Register everything or return with an error */
185 	if (!ENGINE_set_id(e, padlock_id) ||
186 	    !ENGINE_set_name(e, padlock_name) ||
187 
188 	    !ENGINE_set_init_function(e, padlock_init) ||
189 #ifndef OPENSSL_NO_AES
190 	    (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
191 #endif
192 	    (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
193 		return 0;
194 	}
195 
196 	/* Everything looks good */
197 	return 1;
198 }
199 
200 /* Constructor */
201 static ENGINE *
202 ENGINE_padlock(void)
203 {
204 	ENGINE *eng = ENGINE_new();
205 
206 	if (!eng) {
207 		return NULL;
208 	}
209 
210 	if (!padlock_bind_helper(eng)) {
211 		ENGINE_free(eng);
212 		return NULL;
213 	}
214 
215 	return eng;
216 }
217 
218 /* Check availability of the engine */
219 static int
220 padlock_init(ENGINE *e)
221 {
222 	return (padlock_use_rng || padlock_use_ace);
223 }
224 
225 /* This stuff is needed if this ENGINE is being compiled into a self-contained
226  * shared-library.
227  */
228 #ifdef DYNAMIC_ENGINE
229 static int
230 padlock_bind_fn(ENGINE *e, const char *id)
231 {
232 	if (id && (strcmp(id, padlock_id) != 0)) {
233 		return 0;
234 	}
235 
236 	if (!padlock_bind_helper(e))  {
237 		return 0;
238 	}
239 
240 	return 1;
241 }
242 
243 IMPLEMENT_DYNAMIC_CHECK_FN()
244 IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn)
245 #endif /* DYNAMIC_ENGINE */
246 
247 /* ===== Here comes the "real" engine ===== */
248 
249 #ifndef OPENSSL_NO_AES
250 /* Some AES-related constants */
251 #define AES_BLOCK_SIZE		16
252 #define AES_KEY_SIZE_128	16
253 #define AES_KEY_SIZE_192	24
254 #define AES_KEY_SIZE_256	32
255 
256 /* Here we store the status information relevant to the
257    current context. */
258 /* BIG FAT WARNING:
259  * 	Inline assembler in PADLOCK_XCRYPT_ASM()
260  * 	depends on the order of items in this structure.
261  * 	Don't blindly modify, reorder, etc!
262  */
263 struct padlock_cipher_data
264 {
265 	unsigned char iv[AES_BLOCK_SIZE];	/* Initialization vector */
266 	union {	unsigned int pad[4];
267 		struct {
268 			int rounds:4;
269 			int dgst:1;	/* n/a in C3 */
270 			int align:1;	/* n/a in C3 */
271 			int ciphr:1;	/* n/a in C3 */
272 			unsigned int keygen:1;
273 			int interm:1;
274 			unsigned int encdec:1;
275 			int ksize:2;
276 		} b;
277 	} cword;		/* Control word */
278 	AES_KEY ks;		/* Encryption key */
279 };
280 
281 /*
282  * Essentially this variable belongs in thread local storage.
283  * Having this variable global on the other hand can only cause
284  * few bogus key reloads [if any at all on single-CPU system],
285  * so we accept the penatly...
286  */
287 static volatile struct padlock_cipher_data *padlock_saved_context;
288 #endif
289 
290 /*
291  * =======================================================
292  * Inline assembler section(s).
293  * =======================================================
294  * Order of arguments is chosen to facilitate Windows port
295  * using __fastcall calling convention. If you wish to add
296  * more routines, keep in mind that first __fastcall
297  * argument is passed in %ecx and second - in %edx.
298  * =======================================================
299  */
300 #if defined(__GNUC__) && __GNUC__>=2
301 /*
302  * As for excessive "push %ebx"/"pop %ebx" found all over.
303  * When generating position-independent code GCC won't let
304  * us use "b" in assembler templates nor even respect "ebx"
305  * in "clobber description." Therefore the trouble...
306  */
307 
308 /* Helper function - check if a CPUID instruction
309    is available on this CPU */
310 static int
311 padlock_insn_cpuid_available(void)
312 {
313 	int result = -1;
314 
315 	/* We're checking if the bit #21 of EFLAGS
316 	   can be toggled. If yes = CPUID is available. */
317 	asm volatile (
318 		"pushf\n"
319 		"popl %%eax\n"
320 		"xorl $0x200000, %%eax\n"
321 		"movl %%eax, %%ecx\n"
322 		"andl $0x200000, %%ecx\n"
323 		"pushl %%eax\n"
324 		"popf\n"
325 		"pushf\n"
326 		"popl %%eax\n"
327 		"andl $0x200000, %%eax\n"
328 		"xorl %%eax, %%ecx\n"
329 		"movl %%ecx, %0\n"
330 		: "=r" (result) : : "eax", "ecx");
331 
332 	return (result == 0);
333 }
334 
335 /* Load supported features of the CPU to see if
336    the PadLock is available. */
337 static int
338 padlock_available(void)
339 {
340 	char vendor_string[16];
341 	unsigned int eax, edx;
342 
343 	/* First check if the CPUID instruction is available at all... */
344 	if (! padlock_insn_cpuid_available())
345 		return 0;
346 
347 	/* Are we running on the Centaur (VIA) CPU? */
348 	eax = 0x00000000;
349 	vendor_string[12] = 0;
350 	asm volatile (
351 		"pushl	%%ebx\n"
352 		"cpuid\n"
353 		"movl	%%ebx,(%%edi)\n"
354 		"movl	%%edx,4(%%edi)\n"
355 		"movl	%%ecx,8(%%edi)\n"
356 		"popl	%%ebx"
357 		: "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
358 	if (strcmp(vendor_string, "CentaurHauls") != 0)
359 		return 0;
360 
361 	/* Check for Centaur Extended Feature Flags presence */
362 	eax = 0xC0000000;
363 	asm volatile ("pushl %%ebx; cpuid; popl	%%ebx"
364 		: "+a"(eax) : : "ecx", "edx");
365 	if (eax < 0xC0000001)
366 		return 0;
367 
368 	/* Read the Centaur Extended Feature Flags */
369 	eax = 0xC0000001;
370 	asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
371 		: "+a"(eax), "=d"(edx) : : "ecx");
372 
373 	/* Fill up some flags */
374 	padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
375 	padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
376 
377 	return padlock_use_ace + padlock_use_rng;
378 }
379 
380 #ifndef OPENSSL_NO_AES
381 /* Our own htonl()/ntohl() */
382 static inline void
383 padlock_bswapl(AES_KEY *ks)
384 {
385 	size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
386 	unsigned int *key = ks->rd_key;
387 
388 	while (i--) {
389 		asm volatile ("bswapl %0" : "+r"(*key));
390 		key++;
391 	}
392 }
393 #endif
394 
395 /* Force key reload from memory to the CPU microcode.
396    Loading EFLAGS from the stack clears EFLAGS[30]
397    which does the trick. */
398 static inline void
399 padlock_reload_key(void)
400 {
401 	asm volatile ("pushfl; popfl");
402 }
403 
404 #ifndef OPENSSL_NO_AES
405 /*
406  * This is heuristic key context tracing. At first one
407  * believes that one should use atomic swap instructions,
408  * but it's not actually necessary. Point is that if
409  * padlock_saved_context was changed by another thread
410  * after we've read it and before we compare it with cdata,
411  * our key *shall* be reloaded upon thread context switch
412  * and we are therefore set in either case...
413  */
414 static inline void
415 padlock_verify_context(struct padlock_cipher_data *cdata)
416 {
417 	asm volatile (
418 	"pushfl\n"
419 "	btl	$30,(%%esp)\n"
420 "	jnc	1f\n"
421 "	cmpl	%2,%1\n"
422 "	je	1f\n"
423 "	popfl\n"
424 "	subl	$4,%%esp\n"
425 "1:	addl	$4,%%esp\n"
426 "	movl	%2,%0"
427 	:"+m"(padlock_saved_context)
428 	: "r"(padlock_saved_context), "r"(cdata) : "cc");
429 }
430 
431 /* Template for padlock_xcrypt_* modes */
432 /* BIG FAT WARNING:
433  * 	The offsets used with 'leal' instructions
434  * 	describe items of the 'padlock_cipher_data'
435  * 	structure.
436  */
437 #define PADLOCK_XCRYPT_ASM(name,rep_xcrypt)	\
438 static inline void *name(size_t cnt,		\
439 	struct padlock_cipher_data *cdata,	\
440 	void *out, const void *inp) 		\
441 {	void *iv; 				\
442 	asm volatile ( "pushl	%%ebx\n"	\
443 		"	leal	16(%0),%%edx\n"	\
444 		"	leal	32(%0),%%ebx\n"	\
445 			rep_xcrypt "\n"		\
446 		"	popl	%%ebx"		\
447 		: "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
448 		: "0"(cdata), "1"(cnt), "2"(out), "3"(inp)  \
449 		: "edx", "cc", "memory");	\
450 	return iv;				\
451 }
452 
453 /* Generate all functions with appropriate opcodes */
454 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")	/* rep xcryptecb */
455 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")	/* rep xcryptcbc */
456 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")	/* rep xcryptcfb */
457 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")	/* rep xcryptofb */
458 #endif
459 
460 /* The RNG call itself */
461 static inline unsigned int
462 padlock_xstore(void *addr, unsigned int edx_in)
463 {
464 	unsigned int eax_out;
465 
466 	asm volatile (".byte 0x0f,0xa7,0xc0"	/* xstore */
467 	    : "=a"(eax_out),"=m"(*(unsigned *)addr)
468 	    : "D"(addr), "d" (edx_in)
469 	    );
470 
471 	return eax_out;
472 }
473 
474 /* Why not inline 'rep movsd'? I failed to find information on what
475  * value in Direction Flag one can expect and consequently have to
476  * apply "better-safe-than-sorry" approach and assume "undefined."
477  * I could explicitly clear it and restore the original value upon
478  * return from padlock_aes_cipher, but it's presumably too much
479  * trouble for too little gain...
480  *
481  * In case you wonder 'rep xcrypt*' instructions above are *not*
482  * affected by the Direction Flag and pointers advance toward
483  * larger addresses unconditionally.
484  */
485 static inline unsigned char *
486 padlock_memcpy(void *dst,const void *src,size_t n)
487 {
488 	long       *d=dst;
489 	const long *s=src;
490 
491 	n /= sizeof(*d);
492 	do { *d++ = *s++; } while (--n);
493 
494 	return dst;
495 }
496 
497 #elif defined(_MSC_VER)
498 /*
499  * Unlike GCC these are real functions. In order to minimize impact
500  * on performance we adhere to __fastcall calling convention in
501  * order to get two first arguments passed through %ecx and %edx.
502  * Which kind of suits very well, as instructions in question use
503  * both %ecx and %edx as input:-)
504  */
505 #define REP_XCRYPT(code)		\
506 	_asm _emit 0xf3			\
507 	_asm _emit 0x0f _asm _emit 0xa7	\
508 	_asm _emit code
509 
510 /* BIG FAT WARNING:
511  * 	The offsets used with 'lea' instructions
512  * 	describe items of the 'padlock_cipher_data'
513  * 	structure.
514  */
515 #define PADLOCK_XCRYPT_ASM(name,code)	\
516 static void * __fastcall 		\
517 	name (size_t cnt, void *cdata,	\
518 	void *outp, const void *inp)	\
519 {	_asm	mov	eax,edx		\
520 	_asm	lea	edx,[eax+16]	\
521 	_asm	lea	ebx,[eax+32]	\
522 	_asm	mov	edi,outp	\
523 	_asm	mov	esi,inp		\
524 	REP_XCRYPT(code)		\
525 }
526 
527 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
528 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
529 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
530 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
531 
532 static int __fastcall
533 padlock_xstore(void *outp,unsigned int code)
534 {	_asm	mov	edi,ecx
535 	_asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
536 }
537 
538 static void __fastcall
539 padlock_reload_key(void)
540 {	_asm pushfd _asm popfd		}
541 
542 static void __fastcall
543 padlock_verify_context(void *cdata)
544 {	_asm	{
545 		pushfd
546 		bt	DWORD PTR[esp],30
547 		jnc	skip
548 		cmp	ecx,padlock_saved_context
549 		je	skip
550 		popfd
551 		sub	esp,4
552 	skip:	add	esp,4
553 		mov	padlock_saved_context,ecx
554 		}
555 }
556 
557 static int
558 padlock_available(void)
559 {	_asm	{
560 		pushfd
561 		pop	eax
562 		mov	ecx,eax
563 		xor	eax,1<<21
564 		push	eax
565 		popfd
566 		pushfd
567 		pop	eax
568 		xor	eax,ecx
569 		bt	eax,21
570 		jnc	noluck
571 		mov	eax,0
572 		cpuid
573 		xor	eax,eax
574 		cmp	ebx,'tneC'
575 		jne	noluck
576 		cmp	edx,'Hrua'
577 		jne	noluck
578 		cmp	ecx,'slua'
579 		jne	noluck
580 		mov	eax,0xC0000000
581 		cpuid
582 		mov	edx,eax
583 		xor	eax,eax
584 		cmp	edx,0xC0000001
585 		jb	noluck
586 		mov	eax,0xC0000001
587 		cpuid
588 		xor	eax,eax
589 		bt	edx,6
590 		jnc	skip_a
591 		bt	edx,7
592 		jnc	skip_a
593 		mov	padlock_use_ace,1
594 		inc	eax
595 	skip_a:	bt	edx,2
596 		jnc	skip_r
597 		bt	edx,3
598 		jnc	skip_r
599 		mov	padlock_use_rng,1
600 		inc	eax
601 	skip_r:
602 	noluck:
603 		}
604 }
605 
606 static void __fastcall
607 padlock_bswapl(void *key)
608 {	_asm	{
609 		pushfd
610 		cld
611 		mov	esi,ecx
612 		mov	edi,ecx
613 		mov	ecx,60
614 	up:	lodsd
615 		bswap	eax
616 		stosd
617 		loop	up
618 		popfd
619 		}
620 }
621 
622 /* MS actually specifies status of Direction Flag and compiler even
623  * manages to compile following as 'rep movsd' all by itself...
624  */
625 #define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
626 #endif
627 
628 /* ===== AES encryption/decryption ===== */
629 #ifndef OPENSSL_NO_AES
630 
631 #if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
632 #define NID_aes_128_cfb	NID_aes_128_cfb128
633 #endif
634 
635 #if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
636 #define NID_aes_128_ofb	NID_aes_128_ofb128
637 #endif
638 
639 #if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
640 #define NID_aes_192_cfb	NID_aes_192_cfb128
641 #endif
642 
643 #if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
644 #define NID_aes_192_ofb	NID_aes_192_ofb128
645 #endif
646 
647 #if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
648 #define NID_aes_256_cfb	NID_aes_256_cfb128
649 #endif
650 
651 #if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
652 #define NID_aes_256_ofb	NID_aes_256_ofb128
653 #endif
654 
655 /* List of supported ciphers. */
656 static int padlock_cipher_nids[] = {
657 	NID_aes_128_ecb,
658 	NID_aes_128_cbc,
659 	NID_aes_128_cfb,
660 	NID_aes_128_ofb,
661 
662 	NID_aes_192_ecb,
663 	NID_aes_192_cbc,
664 	NID_aes_192_cfb,
665 	NID_aes_192_ofb,
666 
667 	NID_aes_256_ecb,
668 	NID_aes_256_cbc,
669 	NID_aes_256_cfb,
670 	NID_aes_256_ofb,
671 };
672 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
673 				      sizeof(padlock_cipher_nids[0]));
674 
675 /* Function prototypes ... */
676 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
677 				const unsigned char *iv, int enc);
678 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
679 			      const unsigned char *in, size_t nbytes);
680 
681 #define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) +		\
682 	( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F )	)
683 #define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
684 	NEAREST_ALIGNED(ctx->cipher_data))
685 
686 #define EVP_CIPHER_block_size_ECB	AES_BLOCK_SIZE
687 #define EVP_CIPHER_block_size_CBC	AES_BLOCK_SIZE
688 #define EVP_CIPHER_block_size_OFB	1
689 #define EVP_CIPHER_block_size_CFB	1
690 
691 /* Declaring so many ciphers by hand would be a pain.
692    Instead introduce a bit of preprocessor magic :-) */
693 #define	DECLARE_AES_EVP(ksize,lmode,umode)	\
694 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = {	\
695 	NID_aes_##ksize##_##lmode,		\
696 	EVP_CIPHER_block_size_##umode,	\
697 	AES_KEY_SIZE_##ksize,		\
698 	AES_BLOCK_SIZE,			\
699 	0 | EVP_CIPH_##umode##_MODE,	\
700 	padlock_aes_init_key,		\
701 	padlock_aes_cipher,		\
702 	NULL,				\
703 	sizeof(struct padlock_cipher_data) + 16,	\
704 	EVP_CIPHER_set_asn1_iv,		\
705 	EVP_CIPHER_get_asn1_iv,		\
706 	NULL,				\
707 	NULL				\
708 }
709 
710 DECLARE_AES_EVP(128,ecb,ECB);
711 DECLARE_AES_EVP(128,cbc,CBC);
712 DECLARE_AES_EVP(128,cfb,CFB);
713 DECLARE_AES_EVP(128,ofb,OFB);
714 
715 DECLARE_AES_EVP(192,ecb,ECB);
716 DECLARE_AES_EVP(192,cbc,CBC);
717 DECLARE_AES_EVP(192,cfb,CFB);
718 DECLARE_AES_EVP(192,ofb,OFB);
719 
720 DECLARE_AES_EVP(256,ecb,ECB);
721 DECLARE_AES_EVP(256,cbc,CBC);
722 DECLARE_AES_EVP(256,cfb,CFB);
723 DECLARE_AES_EVP(256,ofb,OFB);
724 
725 static int
726 padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
727 {
728 	/* No specific cipher => return a list of supported nids ... */
729 	if (!cipher) {
730 		*nids = padlock_cipher_nids;
731 		return padlock_cipher_nids_num;
732 	}
733 
734 	/* ... or the requested "cipher" otherwise */
735 	switch (nid) {
736 	  case NID_aes_128_ecb:
737 	    *cipher = &padlock_aes_128_ecb;
738 	    break;
739 	  case NID_aes_128_cbc:
740 	    *cipher = &padlock_aes_128_cbc;
741 	    break;
742 	  case NID_aes_128_cfb:
743 	    *cipher = &padlock_aes_128_cfb;
744 	    break;
745 	  case NID_aes_128_ofb:
746 	    *cipher = &padlock_aes_128_ofb;
747 	    break;
748 
749 	  case NID_aes_192_ecb:
750 	    *cipher = &padlock_aes_192_ecb;
751 	    break;
752 	  case NID_aes_192_cbc:
753 	    *cipher = &padlock_aes_192_cbc;
754 	    break;
755 	  case NID_aes_192_cfb:
756 	    *cipher = &padlock_aes_192_cfb;
757 	    break;
758 	  case NID_aes_192_ofb:
759 	    *cipher = &padlock_aes_192_ofb;
760 	    break;
761 
762 	  case NID_aes_256_ecb:
763 	    *cipher = &padlock_aes_256_ecb;
764 	    break;
765 	  case NID_aes_256_cbc:
766 	    *cipher = &padlock_aes_256_cbc;
767 	    break;
768 	  case NID_aes_256_cfb:
769 	    *cipher = &padlock_aes_256_cfb;
770 	    break;
771 	  case NID_aes_256_ofb:
772 	    *cipher = &padlock_aes_256_ofb;
773 	    break;
774 
775 	  default:
776 	    /* Sorry, we don't support this NID */
777 	    *cipher = NULL;
778 	    return 0;
779 	}
780 
781 	return 1;
782 }
783 
784 /* Prepare the encryption key for PadLock usage */
785 static int
786 padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
787 		      const unsigned char *iv, int enc)
788 {
789 	struct padlock_cipher_data *cdata;
790 	int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
791 
792 	if (key==NULL) return 0;	/* ERROR */
793 
794 	cdata = ALIGNED_CIPHER_DATA(ctx);
795 	memset(cdata, 0, sizeof(struct padlock_cipher_data));
796 
797 	/* Prepare Control word. */
798 	if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
799 		cdata->cword.b.encdec = 0;
800 	else
801 		cdata->cword.b.encdec = (ctx->encrypt == 0);
802 	cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
803 	cdata->cword.b.ksize = (key_len - 128) / 64;
804 
805 	switch(key_len) {
806 		case 128:
807 			/* PadLock can generate an extended key for
808 			   AES128 in hardware */
809 			memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
810 			cdata->cword.b.keygen = 0;
811 			break;
812 
813 		case 192:
814 		case 256:
815 			/* Generate an extended AES key in software.
816 			   Needed for AES192/AES256 */
817 			/* Well, the above applies to Stepping 8 CPUs
818 			   and is listed as hardware errata. They most
819 			   likely will fix it at some point and then
820 			   a check for stepping would be due here. */
821 			if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
822 			    EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE ||
823 			    enc)
824 				AES_set_encrypt_key(key, key_len, &cdata->ks);
825 			else
826 				AES_set_decrypt_key(key, key_len, &cdata->ks);
827 #ifndef AES_ASM
828 			/* OpenSSL C functions use byte-swapped extended key. */
829 			padlock_bswapl(&cdata->ks);
830 #endif
831 			cdata->cword.b.keygen = 1;
832 			break;
833 
834 		default:
835 			/* ERROR */
836 			return 0;
837 	}
838 
839 	/*
840 	 * This is done to cover for cases when user reuses the
841 	 * context for new key. The catch is that if we don't do
842 	 * this, padlock_eas_cipher might proceed with old key...
843 	 */
844 	padlock_reload_key ();
845 
846 	return 1;
847 }
848 
849 /*
850  * Simplified version of padlock_aes_cipher() used when
851  * 1) both input and output buffers are at aligned addresses.
852  * or when
853  * 2) running on a newer CPU that doesn't require aligned buffers.
854  */
855 static int
856 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
857 		const unsigned char *in_arg, size_t nbytes)
858 {
859 	struct padlock_cipher_data *cdata;
860 	void  *iv;
861 
862 	cdata = ALIGNED_CIPHER_DATA(ctx);
863 	padlock_verify_context(cdata);
864 
865 	switch (EVP_CIPHER_CTX_mode(ctx)) {
866 	case EVP_CIPH_ECB_MODE:
867 		padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
868 		break;
869 
870 	case EVP_CIPH_CBC_MODE:
871 		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
872 		iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
873 		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
874 		break;
875 
876 	case EVP_CIPH_CFB_MODE:
877 		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
878 		iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
879 		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
880 		break;
881 
882 	case EVP_CIPH_OFB_MODE:
883 		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
884 		padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
885 		memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
886 		break;
887 
888 	default:
889 		return 0;
890 	}
891 
892 	memset(cdata->iv, 0, AES_BLOCK_SIZE);
893 
894 	return 1;
895 }
896 
897 #ifndef  PADLOCK_CHUNK
898 # define PADLOCK_CHUNK	512	/* Must be a power of 2 larger than 16 */
899 #endif
900 #if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
901 # error "insane PADLOCK_CHUNK..."
902 #endif
903 
904 /* Re-align the arguments to 16-Bytes boundaries and run the
905    encryption function itself. This function is not AES-specific. */
906 static int
907 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
908 		   const unsigned char *in_arg, size_t nbytes)
909 {
910 	struct padlock_cipher_data *cdata;
911 	const  void *inp;
912 	unsigned char  *out;
913 	void  *iv;
914 	int    inp_misaligned, out_misaligned, realign_in_loop;
915 	size_t chunk, allocated=0;
916 
917 	/* ctx->num is maintained in byte-oriented modes,
918 	   such as CFB and OFB... */
919 	if ((chunk = ctx->num)) { /* borrow chunk variable */
920 		unsigned char *ivp=ctx->iv;
921 
922 		switch (EVP_CIPHER_CTX_mode(ctx)) {
923 		case EVP_CIPH_CFB_MODE:
924 			if (chunk >= AES_BLOCK_SIZE)
925 				return 0; /* bogus value */
926 
927 			if (ctx->encrypt)
928 				while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
929 					ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
930 					chunk++, nbytes--;
931 				}
932 			else	while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
933 					unsigned char c = *(in_arg++);
934 					*(out_arg++) = c ^ ivp[chunk];
935 					ivp[chunk++] = c, nbytes--;
936 				}
937 
938 			ctx->num = chunk%AES_BLOCK_SIZE;
939 			break;
940 		case EVP_CIPH_OFB_MODE:
941 			if (chunk >= AES_BLOCK_SIZE)
942 				return 0; /* bogus value */
943 
944 			while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
945 				*(out_arg++) = *(in_arg++) ^ ivp[chunk];
946 				chunk++, nbytes--;
947 			}
948 
949 			ctx->num = chunk%AES_BLOCK_SIZE;
950 			break;
951 		}
952 	}
953 
954 	if (nbytes == 0)
955 		return 1;
956 #if 0
957 	if (nbytes % AES_BLOCK_SIZE)
958 		return 0; /* are we expected to do tail processing? */
959 #else
960 	/* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC
961 	   modes and arbitrary value in byte-oriented modes, such as
962 	   CFB and OFB... */
963 #endif
964 
965 	/* VIA promises CPUs that won't require alignment in the future.
966 	   For now padlock_aes_align_required is initialized to 1 and
967 	   the condition is never met... */
968 	/* C7 core is capable to manage unaligned input in non-ECB[!]
969 	   mode, but performance penalties appear to be approximately
970 	   same as for software alignment below or ~3x. They promise to
971 	   improve it in the future, but for now we can just as well
972 	   pretend that it can only handle aligned input... */
973 	if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0)
974 		return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
975 
976 	inp_misaligned = (((size_t)in_arg) & 0x0F);
977 	out_misaligned = (((size_t)out_arg) & 0x0F);
978 
979 	/* Note that even if output is aligned and input not,
980 	 * I still prefer to loop instead of copy the whole
981 	 * input and then encrypt in one stroke. This is done
982 	 * in order to improve L1 cache utilization... */
983 	realign_in_loop = out_misaligned|inp_misaligned;
984 
985 	if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0)
986 		return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
987 
988 	/* this takes one "if" out of the loops */
989 	chunk  = nbytes;
990 	chunk %= PADLOCK_CHUNK;
991 	if (chunk==0) chunk = PADLOCK_CHUNK;
992 
993 	if (out_misaligned) {
994 		/* optmize for small input */
995 		allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
996 		out = alloca(0x10 + allocated);
997 		out = NEAREST_ALIGNED(out);
998 	}
999 	else
1000 		out = out_arg;
1001 
1002 	cdata = ALIGNED_CIPHER_DATA(ctx);
1003 	padlock_verify_context(cdata);
1004 
1005 	switch (EVP_CIPHER_CTX_mode(ctx)) {
1006 	case EVP_CIPH_ECB_MODE:
1007 		do	{
1008 			if (inp_misaligned)
1009 				inp = padlock_memcpy(out, in_arg, chunk);
1010 			else
1011 				inp = in_arg;
1012 			in_arg += chunk;
1013 
1014 			padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1015 
1016 			if (out_misaligned)
1017 				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1018 			else
1019 				out     = out_arg+=chunk;
1020 
1021 			nbytes -= chunk;
1022 			chunk   = PADLOCK_CHUNK;
1023 		} while (nbytes);
1024 		break;
1025 
1026 	case EVP_CIPH_CBC_MODE:
1027 		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1028 		goto cbc_shortcut;
1029 		do	{
1030 			if (iv != cdata->iv)
1031 				memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1032 			chunk = PADLOCK_CHUNK;
1033 		cbc_shortcut: /* optimize for small input */
1034 			if (inp_misaligned)
1035 				inp = padlock_memcpy(out, in_arg, chunk);
1036 			else
1037 				inp = in_arg;
1038 			in_arg += chunk;
1039 
1040 			iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1041 
1042 			if (out_misaligned)
1043 				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1044 			else
1045 				out     = out_arg+=chunk;
1046 
1047 		} while (nbytes -= chunk);
1048 		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1049 		break;
1050 
1051 	case EVP_CIPH_CFB_MODE:
1052 		memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1053 		chunk &= ~(AES_BLOCK_SIZE-1);
1054 		if (chunk)	goto cfb_shortcut;
1055 		else		goto cfb_skiploop;
1056 		do	{
1057 			if (iv != cdata->iv)
1058 				memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1059 			chunk = PADLOCK_CHUNK;
1060 		cfb_shortcut: /* optimize for small input */
1061 			if (inp_misaligned)
1062 				inp = padlock_memcpy(out, in_arg, chunk);
1063 			else
1064 				inp = in_arg;
1065 			in_arg += chunk;
1066 
1067 			iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1068 
1069 			if (out_misaligned)
1070 				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1071 			else
1072 				out     = out_arg+=chunk;
1073 
1074 			nbytes -= chunk;
1075 		} while (nbytes >= AES_BLOCK_SIZE);
1076 
1077 		cfb_skiploop:
1078 		if (nbytes) {
1079 			unsigned char *ivp = cdata->iv;
1080 
1081 			if (iv != ivp) {
1082 				memcpy(ivp, iv, AES_BLOCK_SIZE);
1083 				iv = ivp;
1084 			}
1085 			ctx->num = nbytes;
1086 			if (cdata->cword.b.encdec) {
1087 				cdata->cword.b.encdec=0;
1088 				padlock_reload_key();
1089 				padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1090 				cdata->cword.b.encdec=1;
1091 				padlock_reload_key();
1092 				while(nbytes) {
1093 					unsigned char c = *(in_arg++);
1094 					*(out_arg++) = c ^ *ivp;
1095 					*(ivp++) = c, nbytes--;
1096 				}
1097 			}
1098 			else {	padlock_reload_key();
1099 				padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1100 				padlock_reload_key();
1101 				while (nbytes) {
1102 					*ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1103 					ivp++, nbytes--;
1104 				}
1105 			}
1106 		}
1107 
1108 		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1109 		break;
1110 
1111 	case EVP_CIPH_OFB_MODE:
1112 		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1113 		chunk &= ~(AES_BLOCK_SIZE-1);
1114 		if (chunk) do	{
1115 			if (inp_misaligned)
1116 				inp = padlock_memcpy(out, in_arg, chunk);
1117 			else
1118 				inp = in_arg;
1119 			in_arg += chunk;
1120 
1121 			padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1122 
1123 			if (out_misaligned)
1124 				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1125 			else
1126 				out     = out_arg+=chunk;
1127 
1128 			nbytes -= chunk;
1129 			chunk   = PADLOCK_CHUNK;
1130 		} while (nbytes >= AES_BLOCK_SIZE);
1131 
1132 		if (nbytes) {
1133 			unsigned char *ivp = cdata->iv;
1134 
1135 			ctx->num = nbytes;
1136 			padlock_reload_key();	/* empirically found */
1137 			padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1138 			padlock_reload_key();	/* empirically found */
1139 			while (nbytes) {
1140 				*(out_arg++) = *(in_arg++) ^ *ivp;
1141 				ivp++, nbytes--;
1142 			}
1143 		}
1144 
1145 		memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1146 		break;
1147 
1148 	default:
1149 		return 0;
1150 	}
1151 
1152 	/* Clean the realign buffer if it was used */
1153 	if (out_misaligned) {
1154 		volatile unsigned long *p=(void *)out;
1155 		size_t   n = allocated/sizeof(*p);
1156 		while (n--) *p++=0;
1157 	}
1158 
1159 	memset(cdata->iv, 0, AES_BLOCK_SIZE);
1160 
1161 	return 1;
1162 }
1163 
1164 #endif /* OPENSSL_NO_AES */
1165 
1166 /* ===== Random Number Generator ===== */
1167 /*
1168  * This code is not engaged. The reason is that it does not comply
1169  * with recommendations for VIA RNG usage for secure applications
1170  * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1171  * provide meaningful error control...
1172  */
1173 /* Wrapper that provides an interface between the API and
1174    the raw PadLock RNG */
1175 static int
1176 padlock_rand_bytes(unsigned char *output, int count)
1177 {
1178 	unsigned int eax, buf;
1179 
1180 	while (count >= 8) {
1181 		eax = padlock_xstore(output, 0);
1182 		if (!(eax&(1<<6)))	return 0; /* RNG disabled */
1183 		/* this ---vv--- covers DC bias, Raw Bits and String Filter */
1184 		if (eax&(0x1F<<10))	return 0;
1185 		if ((eax&0x1F)==0)	continue; /* no data, retry... */
1186 		if ((eax&0x1F)!=8)	return 0; /* fatal failure...  */
1187 		output += 8;
1188 		count  -= 8;
1189 	}
1190 	while (count > 0) {
1191 		eax = padlock_xstore(&buf, 3);
1192 		if (!(eax&(1<<6)))	return 0; /* RNG disabled */
1193 		/* this ---vv--- covers DC bias, Raw Bits and String Filter */
1194 		if (eax&(0x1F<<10))	return 0;
1195 		if ((eax&0x1F)==0)	continue; /* no data, retry... */
1196 		if ((eax&0x1F)!=1)	return 0; /* fatal failure...  */
1197 		*output++ = (unsigned char)buf;
1198 		count--;
1199 	}
1200 	*(volatile unsigned int *)&buf=0;
1201 
1202 	return 1;
1203 }
1204 
1205 /* Dummy but necessary function */
1206 static int
1207 padlock_rand_status(void)
1208 {
1209 	return 1;
1210 }
1211 
1212 /* Prepare structure for registration */
1213 static RAND_METHOD padlock_rand = {
1214 	NULL,			/* seed */
1215 	padlock_rand_bytes,	/* bytes */
1216 	NULL,			/* cleanup */
1217 	NULL,			/* add */
1218 	padlock_rand_bytes,	/* pseudorand */
1219 	padlock_rand_status,	/* rand status */
1220 };
1221 
1222 #else  /* !COMPILE_HW_PADLOCK */
1223 #ifndef OPENSSL_NO_DYNAMIC_ENGINE
1224 OPENSSL_EXPORT
1225 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
1226 OPENSSL_EXPORT
1227 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) { return 0; }
1228 IMPLEMENT_DYNAMIC_CHECK_FN()
1229 #endif
1230 #endif /* COMPILE_HW_PADLOCK */
1231 
1232 #endif /* !OPENSSL_NO_HW_PADLOCK */
1233 #endif /* !OPENSSL_NO_HW */
1234