xref: /onnv-gate/usr/src/common/crypto/aes/amd64/aesopt.h (revision 7421:8b7f030a1d82)
16877Sda73024 /*
26877Sda73024  * ---------------------------------------------------------------------------
36877Sda73024  * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
46877Sda73024  *
56877Sda73024  * LICENSE TERMS
66877Sda73024  *
76877Sda73024  * The free distribution and use of this software is allowed (with or without
86877Sda73024  * changes) provided that:
96877Sda73024  *
106877Sda73024  *  1. source code distributions include the above copyright notice, this
116877Sda73024  *	list of conditions and the following disclaimer;
126877Sda73024  *
136877Sda73024  *  2. binary distributions include the above copyright notice, this list
146877Sda73024  *	of conditions and the following disclaimer in their documentation;
156877Sda73024  *
166877Sda73024  *  3. the name of the copyright holder is not used to endorse products
176877Sda73024  *	built using this software without specific written permission.
186877Sda73024  *
196877Sda73024  * DISCLAIMER
206877Sda73024  *
216877Sda73024  * This software is provided 'as is' with no explicit or implied warranties
226877Sda73024  * in respect of its properties, including, but not limited to, correctness
236877Sda73024  * and/or fitness for purpose.
246877Sda73024  * ---------------------------------------------------------------------------
256877Sda73024  * Issue Date: 20/12/2007
266877Sda73024  *
276877Sda73024  * This file contains the compilation options for AES (Rijndael) and code
286877Sda73024  * that is common across encryption, key scheduling and table generation.
296877Sda73024  *
306877Sda73024  * OPERATION
316877Sda73024  *
326877Sda73024  * These source code files implement the AES algorithm Rijndael designed by
336877Sda73024  * Joan Daemen and Vincent Rijmen. This version is designed for the standard
346877Sda73024  * block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
356877Sda73024  * and 32 bytes).
366877Sda73024  *
376877Sda73024  * This version is designed for flexibility and speed using operations on
386877Sda73024  * 32-bit words rather than operations on bytes.  It can be compiled with
396877Sda73024  * either big or little endian internal byte order but is faster when the
406877Sda73024  * native byte order for the processor is used.
416877Sda73024  *
426877Sda73024  * THE CIPHER INTERFACE
436877Sda73024  *
446877Sda73024  * The cipher interface is implemented as an array of bytes in which lower
456877Sda73024  * AES bit sequence indexes map to higher numeric significance within bytes.
466877Sda73024  */
476877Sda73024 
486877Sda73024 /*
496877Sda73024  * OpenSolaris changes
506877Sda73024  * 1. Added __cplusplus and _AESTAB_H header guards
516877Sda73024  * 2. Added header files sys/types.h and aes_impl.h
526877Sda73024  * 3. Added defines for AES_ENCRYPT, AES_DECRYPT, AES_REV_DKS, and ASM_AMD64_C
536877Sda73024  * 4. Moved defines for IS_BIG_ENDIAN, IS_LITTLE_ENDIAN, PLATFORM_BYTE_ORDER
546877Sda73024  *    from brg_endian.h
556877Sda73024  * 5. Undefined VIA_ACE_POSSIBLE and ASSUME_VIA_ACE_PRESENT
566877Sda73024  * 6. Changed uint_8t and uint_32t to uint8_t and uint32_t
57*7421SDaniel.Anderson@Sun.COM  * 7. Defined aes_sw32 as htonl() for byte swapping
58*7421SDaniel.Anderson@Sun.COM  * 8. Cstyled and hdrchk code
596877Sda73024  *
606877Sda73024  */
616877Sda73024 
626877Sda73024 #ifndef _AESOPT_H
636877Sda73024 #define	_AESOPT_H
646877Sda73024 
656877Sda73024 #ifdef	__cplusplus
666877Sda73024 extern "C" {
676877Sda73024 #endif
686877Sda73024 
696877Sda73024 #include <sys/types.h>
70*7421SDaniel.Anderson@Sun.COM #include <sys/byteorder.h>
716877Sda73024 #include <aes_impl.h>
726877Sda73024 
736877Sda73024 /*  SUPPORT FEATURES */
746877Sda73024 #define	AES_ENCRYPT /* if support for encryption is needed */
756877Sda73024 #define	AES_DECRYPT /* if support for decryption is needed */
766877Sda73024 
776877Sda73024 /*  PLATFORM-SPECIFIC FEATURES */
786877Sda73024 #define	IS_BIG_ENDIAN		4321 /* byte 0 is most significant (mc68k) */
796877Sda73024 #define	IS_LITTLE_ENDIAN	1234 /* byte 0 is least significant (i386) */
806877Sda73024 #define	PLATFORM_BYTE_ORDER	IS_LITTLE_ENDIAN
816877Sda73024 #define	AES_REV_DKS /* define to reverse decryption key schedule */
826877Sda73024 
836877Sda73024 
846877Sda73024 /*
856877Sda73024  *  CONFIGURATION - THE USE OF DEFINES
866877Sda73024  *	Later in this section there are a number of defines that control the
876877Sda73024  *	operation of the code.  In each section, the purpose of each define is
886877Sda73024  *	explained so that the relevant form can be included or excluded by
896877Sda73024  *	setting either 1's or 0's respectively on the branches of the related
906877Sda73024  *	#if clauses.  The following local defines should not be changed.
916877Sda73024  */
926877Sda73024 
936877Sda73024 #define	ENCRYPTION_IN_C	1
946877Sda73024 #define	DECRYPTION_IN_C	2
956877Sda73024 #define	ENC_KEYING_IN_C	4
966877Sda73024 #define	DEC_KEYING_IN_C	8
976877Sda73024 
986877Sda73024 #define	NO_TABLES	0
996877Sda73024 #define	ONE_TABLE	1
1006877Sda73024 #define	FOUR_TABLES	4
1016877Sda73024 #define	NONE		0
1026877Sda73024 #define	PARTIAL		1
1036877Sda73024 #define	FULL		2
1046877Sda73024 
1056877Sda73024 /*  --- START OF USER CONFIGURED OPTIONS --- */
1066877Sda73024 
1076877Sda73024 /*
1086877Sda73024  *  1. BYTE ORDER WITHIN 32 BIT WORDS
1096877Sda73024  *
1106877Sda73024  *	The fundamental data processing units in Rijndael are 8-bit bytes. The
1116877Sda73024  *	input, output and key input are all enumerated arrays of bytes in which
1126877Sda73024  *	bytes are numbered starting at zero and increasing to one less than the
1136877Sda73024  *	number of bytes in the array in question. This enumeration is only used
1146877Sda73024  *	for naming bytes and does not imply any adjacency or order relationship
1156877Sda73024  *	from one byte to another. When these inputs and outputs are considered
1166877Sda73024  *	as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
1176877Sda73024  *	byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
1186877Sda73024  *	In this implementation bits are numbered from 0 to 7 starting at the
1196877Sda73024  *	numerically least significant end of each byte.  Bit n represents 2^n.
1206877Sda73024  *
1216877Sda73024  *	However, Rijndael can be implemented more efficiently using 32-bit
1226877Sda73024  *	words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
1236877Sda73024  *	into word[n]. While in principle these bytes can be assembled into words
1246877Sda73024  *	in any positions, this implementation only supports the two formats in
1256877Sda73024  *	which bytes in adjacent positions within words also have adjacent byte
1266877Sda73024  *	numbers. This order is called big-endian if the lowest numbered bytes
1276877Sda73024  *	in words have the highest numeric significance and little-endian if the
1286877Sda73024  *	opposite applies.
1296877Sda73024  *
1306877Sda73024  *	This code can work in either order irrespective of the order used by the
1316877Sda73024  *	machine on which it runs. Normally the internal byte order will be set
1326877Sda73024  *	to the order of the processor on which the code is to be run but this
1336877Sda73024  *	define	can be used to reverse this in special situations
1346877Sda73024  *
1356877Sda73024  *	WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
1366877Sda73024  *	This define will hence be redefined later (in section 4) if necessary
1376877Sda73024  */
1386877Sda73024 
1396877Sda73024 #if 1
1406877Sda73024 #define	ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
1416877Sda73024 #elif 0
1426877Sda73024 #define	ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
1436877Sda73024 #elif 0
1446877Sda73024 #define	ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
1456877Sda73024 #else
1466877Sda73024 #error The algorithm byte order is not defined
1476877Sda73024 #endif
1486877Sda73024 
1496877Sda73024 /*  2. VIA ACE SUPPORT */
1506877Sda73024 
1516877Sda73024 #if defined(__GNUC__) && defined(__i386__) || \
1526877Sda73024 	defined(_WIN32) && defined(_M_IX86) && \
1536877Sda73024 	!(defined(_WIN64) || defined(_WIN32_WCE) || \
1546877Sda73024 	defined(_MSC_VER) && (_MSC_VER <= 800))
1556877Sda73024 #define	VIA_ACE_POSSIBLE
1566877Sda73024 #endif
1576877Sda73024 
1586877Sda73024 /*
1596877Sda73024  *  Define this option if support for the VIA ACE is required. This uses
1606877Sda73024  *  inline assembler instructions and is only implemented for the Microsoft,
1616877Sda73024  *  Intel and GCC compilers.  If VIA ACE is known to be present, then defining
1626877Sda73024  *  ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
1636877Sda73024  *  code.  If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
1646877Sda73024  *  it is detected (both present and enabled) but the normal AES code will
1656877Sda73024  *  also be present.
1666877Sda73024  *
1676877Sda73024  *  When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
1686877Sda73024  *  aligned; other input/output buffers do not need to be 16 byte aligned
1696877Sda73024  *  but there are very large performance gains if this can be arranged.
1706877Sda73024  *  VIA ACE also requires the decryption key schedule to be in reverse
1716877Sda73024  *  order (which later checks below ensure).
1726877Sda73024  */
1736877Sda73024 
1746877Sda73024 /*  VIA ACE is not used here for OpenSolaris: */
1756877Sda73024 #undef	VIA_ACE_POSSIBLE
1766877Sda73024 #undef	ASSUME_VIA_ACE_PRESENT
1776877Sda73024 
1786877Sda73024 #if 0 && defined(VIA_ACE_POSSIBLE) && !defined(USE_VIA_ACE_IF_PRESENT)
1796877Sda73024 #define	USE_VIA_ACE_IF_PRESENT
1806877Sda73024 #endif
1816877Sda73024 
1826877Sda73024 #if 0 && defined(VIA_ACE_POSSIBLE) && !defined(ASSUME_VIA_ACE_PRESENT)
1836877Sda73024 #define	ASSUME_VIA_ACE_PRESENT
1846877Sda73024 #endif
1856877Sda73024 
1866877Sda73024 
1876877Sda73024 /*
1886877Sda73024  *  3. ASSEMBLER SUPPORT
1896877Sda73024  *
1906877Sda73024  *	This define (which can be on the command line) enables the use of the
1916877Sda73024  *	assembler code routines for encryption, decryption and key scheduling
1926877Sda73024  *	as follows:
1936877Sda73024  *
1946877Sda73024  *	ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
1956877Sda73024  *		encryption and decryption and but with key scheduling in C
1966877Sda73024  *	ASM_X86_V2  uses assembler (aes_x86_v2.asm) with compressed tables for
1976877Sda73024  *		encryption, decryption and key scheduling
1986877Sda73024  *	ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
1996877Sda73024  *		encryption and decryption and but with key scheduling in C
2006877Sda73024  *	ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
2016877Sda73024  *		encryption and decryption and but with key scheduling in C
2026877Sda73024  *
2036877Sda73024  *	Change one 'if 0' below to 'if 1' to select the version or define
2046877Sda73024  *	as a compilation option.
2056877Sda73024  */
2066877Sda73024 
2076877Sda73024 #if 0 && !defined(ASM_X86_V1C)
2086877Sda73024 #define	ASM_X86_V1C
2096877Sda73024 #elif 0 && !defined(ASM_X86_V2)
2106877Sda73024 #define	ASM_X86_V2
2116877Sda73024 #elif 0 && !defined(ASM_X86_V2C)
2126877Sda73024 #define	ASM_X86_V2C
2136877Sda73024 #elif 1 && !defined(ASM_AMD64_C)
2146877Sda73024 #define	ASM_AMD64_C
2156877Sda73024 #endif
2166877Sda73024 
2176877Sda73024 #if (defined(ASM_X86_V1C) || defined(ASM_X86_V2) || defined(ASM_X86_V2C)) && \
2186877Sda73024 	!defined(_M_IX86) || defined(ASM_AMD64_C) && !defined(_M_X64) && \
2196877Sda73024 	!defined(__amd64)
2206877Sda73024 #error Assembler code is only available for x86 and AMD64 systems
2216877Sda73024 #endif
2226877Sda73024 
2236877Sda73024 /*
2246877Sda73024  *  4. FAST INPUT/OUTPUT OPERATIONS.
2256877Sda73024  *
2266877Sda73024  *	On some machines it is possible to improve speed by transferring the
2276877Sda73024  *	bytes in the input and output arrays to and from the internal 32-bit
2286877Sda73024  *	variables by addressing these arrays as if they are arrays of 32-bit
2296877Sda73024  *	words.  On some machines this will always be possible but there may
2306877Sda73024  *	be a large performance penalty if the byte arrays are not aligned on
2316877Sda73024  *	the normal word boundaries. On other machines this technique will
2326877Sda73024  *	lead to memory access errors when such 32-bit word accesses are not
2336877Sda73024  *	properly aligned. The option SAFE_IO avoids such problems but will
2346877Sda73024  *	often be slower on those machines that support misaligned access
2356877Sda73024  *	(especially so if care is taken to align the input  and output byte
2366877Sda73024  *	arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
2376877Sda73024  *	assumed that access to byte arrays as if they are arrays of 32-bit
2386877Sda73024  *	words will not cause problems when such accesses are misaligned.
2396877Sda73024  */
2406877Sda73024 #if 1 && !defined(_MSC_VER)
2416877Sda73024 #define	SAFE_IO
2426877Sda73024 #endif
2436877Sda73024 
2446877Sda73024 /*
2456877Sda73024  *  5. LOOP UNROLLING
2466877Sda73024  *
2476877Sda73024  *	The code for encryption and decryption cycles through a number of rounds
2486877Sda73024  *	that can be implemented either in a loop or by expanding the code into a
2496877Sda73024  *	long sequence of instructions, the latter producing a larger program but
2506877Sda73024  *	one that will often be much faster. The latter is called loop unrolling.
2516877Sda73024  *	There are also potential speed advantages in expanding two iterations in
2526877Sda73024  *	a loop with half the number of iterations, which is called partial loop
2536877Sda73024  *	unrolling.  The following options allow partial or full loop unrolling
2546877Sda73024  *	to be set independently for encryption and decryption
2556877Sda73024  */
2566877Sda73024 #if 1
2576877Sda73024 #define	ENC_UNROLL  FULL
2586877Sda73024 #elif 0
2596877Sda73024 #define	ENC_UNROLL  PARTIAL
2606877Sda73024 #else
2616877Sda73024 #define	ENC_UNROLL  NONE
2626877Sda73024 #endif
2636877Sda73024 
2646877Sda73024 #if 1
2656877Sda73024 #define	DEC_UNROLL  FULL
2666877Sda73024 #elif 0
2676877Sda73024 #define	DEC_UNROLL  PARTIAL
2686877Sda73024 #else
2696877Sda73024 #define	DEC_UNROLL  NONE
2706877Sda73024 #endif
2716877Sda73024 
2726877Sda73024 #if 1
2736877Sda73024 #define	ENC_KS_UNROLL
2746877Sda73024 #endif
2756877Sda73024 
2766877Sda73024 #if 1
2776877Sda73024 #define	DEC_KS_UNROLL
2786877Sda73024 #endif
2796877Sda73024 
2806877Sda73024 /*
2816877Sda73024  *  6. FAST FINITE FIELD OPERATIONS
2826877Sda73024  *
2836877Sda73024  *	If this section is included, tables are used to provide faster finite
2846877Sda73024  *	field arithmetic.  This has no effect if FIXED_TABLES is defined.
2856877Sda73024  */
2866877Sda73024 #if 1
2876877Sda73024 #define	FF_TABLES
2886877Sda73024 #endif
2896877Sda73024 
2906877Sda73024 /*
2916877Sda73024  *  7. INTERNAL STATE VARIABLE FORMAT
2926877Sda73024  *
2936877Sda73024  *	The internal state of Rijndael is stored in a number of local 32-bit
2946877Sda73024  *	word variables which can be defined either as an array or as individual
2956877Sda73024  *	names variables. Include this section if you want to store these local
2966877Sda73024  *	variables in arrays. Otherwise individual local variables will be used.
2976877Sda73024  */
2986877Sda73024 #if 1
2996877Sda73024 #define	ARRAYS
3006877Sda73024 #endif
3016877Sda73024 
3026877Sda73024 /*
3036877Sda73024  *  8. FIXED OR DYNAMIC TABLES
3046877Sda73024  *
3056877Sda73024  *	When this section is included the tables used by the code are compiled
3066877Sda73024  *	statically into the binary file.  Otherwise the subroutine aes_init()
3076877Sda73024  *	must be called to compute them before the code is first used.
3086877Sda73024  */
3096877Sda73024 #if 1 && !(defined(_MSC_VER) && (_MSC_VER <= 800))
3106877Sda73024 #define	FIXED_TABLES
3116877Sda73024 #endif
3126877Sda73024 
3136877Sda73024 /*
3146877Sda73024  *  9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
3156877Sda73024  *
3166877Sda73024  *	In some systems it is better to mask longer values to extract bytes
3176877Sda73024  *	rather than using a cast. This option allows this choice.
3186877Sda73024  */
3196877Sda73024 #if 0
3206877Sda73024 #define	to_byte(x)  ((uint8_t)(x))
3216877Sda73024 #else
3226877Sda73024 #define	to_byte(x)  ((x) & 0xff)
3236877Sda73024 #endif
3246877Sda73024 
3256877Sda73024 /*
3266877Sda73024  *  10. TABLE ALIGNMENT
3276877Sda73024  *
3286877Sda73024  *	On some systems speed will be improved by aligning the AES large lookup
3296877Sda73024  *	tables on particular boundaries. This define should be set to a power of
3306877Sda73024  *	two giving the desired alignment. It can be left undefined if alignment
3316877Sda73024  *	is not needed.  This option is specific to the Micrsoft VC++ compiler -
3326877Sda73024  *	it seems to sometimes cause trouble for the VC++ version 6 compiler.
3336877Sda73024  */
3346877Sda73024 
3356877Sda73024 #if 1 && defined(_MSC_VER) && (_MSC_VER >= 1300)
3366877Sda73024 #define	TABLE_ALIGN 32
3376877Sda73024 #endif
3386877Sda73024 
3396877Sda73024 /*
3406877Sda73024  *  11.  REDUCE CODE AND TABLE SIZE
3416877Sda73024  *
3426877Sda73024  *	This replaces some expanded macros with function calls if AES_ASM_V2 or
3436877Sda73024  *	AES_ASM_V2C are defined
3446877Sda73024  */
3456877Sda73024 
3466877Sda73024 #if 1 && (defined(ASM_X86_V2) || defined(ASM_X86_V2C))
3476877Sda73024 #define	REDUCE_CODE_SIZE
3486877Sda73024 #endif
3496877Sda73024 
3506877Sda73024 /*
3516877Sda73024  *  12. TABLE OPTIONS
3526877Sda73024  *
3536877Sda73024  *	This cipher proceeds by repeating in a number of cycles known as rounds
3546877Sda73024  *	which are implemented by a round function which is optionally be speeded
3556877Sda73024  *	up using tables.  The basic tables are 256 32-bit words, with either
3566877Sda73024  *	one or four tables being required for each round function depending on
3576877Sda73024  *	how much speed is required. Encryption and decryption round functions
3586877Sda73024  *	are different and the last encryption and decryption round functions are
3596877Sda73024  *	different again making four different round functions in all.
3606877Sda73024  *
3616877Sda73024  *	This means that:
3626877Sda73024  *	1. Normal encryption and decryption rounds can each use either 0, 1
3636877Sda73024  *		or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
3646877Sda73024  *	2. The last encryption and decryption rounds can also use either 0, 1
3656877Sda73024  *		or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
3666877Sda73024  *
3676877Sda73024  *	Include or exclude the appropriate definitions below to set the number
3686877Sda73024  *	of tables used by this implementation.
3696877Sda73024  */
3706877Sda73024 
3716877Sda73024 #if 1   /* set tables for the normal encryption round */
3726877Sda73024 #define	ENC_ROUND   FOUR_TABLES
3736877Sda73024 #elif 0
3746877Sda73024 #define	ENC_ROUND   ONE_TABLE
3756877Sda73024 #else
3766877Sda73024 #define	ENC_ROUND   NO_TABLES
3776877Sda73024 #endif
3786877Sda73024 
3796877Sda73024 #if 1   /* set tables for the last encryption round */
3806877Sda73024 #define	LAST_ENC_ROUND  FOUR_TABLES
3816877Sda73024 #elif 0
3826877Sda73024 #define	LAST_ENC_ROUND  ONE_TABLE
3836877Sda73024 #else
3846877Sda73024 #define	LAST_ENC_ROUND  NO_TABLES
3856877Sda73024 #endif
3866877Sda73024 
3876877Sda73024 #if 1   /* set tables for the normal decryption round */
3886877Sda73024 #define	DEC_ROUND   FOUR_TABLES
3896877Sda73024 #elif 0
3906877Sda73024 #define	DEC_ROUND   ONE_TABLE
3916877Sda73024 #else
3926877Sda73024 #define	DEC_ROUND   NO_TABLES
3936877Sda73024 #endif
3946877Sda73024 
3956877Sda73024 #if 1   /* set tables for the last decryption round */
3966877Sda73024 #define	LAST_DEC_ROUND  FOUR_TABLES
3976877Sda73024 #elif 0
3986877Sda73024 #define	LAST_DEC_ROUND  ONE_TABLE
3996877Sda73024 #else
4006877Sda73024 #define	LAST_DEC_ROUND  NO_TABLES
4016877Sda73024 #endif
4026877Sda73024 
4036877Sda73024 /*
4046877Sda73024  *  The decryption key schedule can be speeded up with tables in the same
4056877Sda73024  *	way that the round functions can.  Include or exclude the following
4066877Sda73024  *	defines to set this requirement.
4076877Sda73024  */
4086877Sda73024 #if 1
4096877Sda73024 #define	KEY_SCHED   FOUR_TABLES
4106877Sda73024 #elif 0
4116877Sda73024 #define	KEY_SCHED   ONE_TABLE
4126877Sda73024 #else
4136877Sda73024 #define	KEY_SCHED   NO_TABLES
4146877Sda73024 #endif
4156877Sda73024 
4166877Sda73024 /*  ---- END OF USER CONFIGURED OPTIONS ---- */
4176877Sda73024 
4186877Sda73024 /* VIA ACE support is only available for VC++ and GCC */
4196877Sda73024 
4206877Sda73024 #if !defined(_MSC_VER) && !defined(__GNUC__)
4216877Sda73024 #if defined(ASSUME_VIA_ACE_PRESENT)
4226877Sda73024 #undef ASSUME_VIA_ACE_PRESENT
4236877Sda73024 #endif
4246877Sda73024 #if defined(USE_VIA_ACE_IF_PRESENT)
4256877Sda73024 #undef USE_VIA_ACE_IF_PRESENT
4266877Sda73024 #endif
4276877Sda73024 #endif
4286877Sda73024 
4296877Sda73024 #if defined(ASSUME_VIA_ACE_PRESENT) && !defined(USE_VIA_ACE_IF_PRESENT)
4306877Sda73024 #define	USE_VIA_ACE_IF_PRESENT
4316877Sda73024 #endif
4326877Sda73024 
4336877Sda73024 #if defined(USE_VIA_ACE_IF_PRESENT) && !defined(AES_REV_DKS)
4346877Sda73024 #define	AES_REV_DKS
4356877Sda73024 #endif
4366877Sda73024 
4376877Sda73024 /* Assembler support requires the use of platform byte order */
4386877Sda73024 
4396877Sda73024 #if (defined(ASM_X86_V1C) || defined(ASM_X86_V2C) || defined(ASM_AMD64_C)) && \
4406877Sda73024 	(ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
4416877Sda73024 #undef  ALGORITHM_BYTE_ORDER
4426877Sda73024 #define	ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
4436877Sda73024 #endif
4446877Sda73024 
4456877Sda73024 /*
4466877Sda73024  * In this implementation the columns of the state array are each held in
4476877Sda73024  *	32-bit words. The state array can be held in various ways: in an array
4486877Sda73024  *	of words, in a number of individual word variables or in a number of
4496877Sda73024  *	processor registers. The following define maps a variable name x and
4506877Sda73024  *	a column number c to the way the state array variable is to be held.
4516877Sda73024  *	The first define below maps the state into an array x[c] whereas the
4526877Sda73024  *	second form maps the state into a number of individual variables x0,
4536877Sda73024  *	x1, etc.  Another form could map individual state columns to machine
4546877Sda73024  *	register names.
4556877Sda73024  */
4566877Sda73024 
4576877Sda73024 #if defined(ARRAYS)
4586877Sda73024 #define	s(x, c) x[c]
4596877Sda73024 #else
4606877Sda73024 #define	s(x, c) x##c
4616877Sda73024 #endif
4626877Sda73024 
4636877Sda73024 /*
4646877Sda73024  *  This implementation provides subroutines for encryption, decryption
4656877Sda73024  *	and for setting the three key lengths (separately) for encryption
4666877Sda73024  *	and decryption. Since not all functions are needed, masks are set
4676877Sda73024  *	up here to determine which will be implemented in C
4686877Sda73024  */
4696877Sda73024 
4706877Sda73024 #if !defined(AES_ENCRYPT)
4716877Sda73024 #define	EFUNCS_IN_C   0
4726877Sda73024 #elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
4736877Sda73024 	defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
4746877Sda73024 #define	EFUNCS_IN_C   ENC_KEYING_IN_C
4756877Sda73024 #elif !defined(ASM_X86_V2)
4766877Sda73024 #define	EFUNCS_IN_C   (ENCRYPTION_IN_C | ENC_KEYING_IN_C)
4776877Sda73024 #else
4786877Sda73024 #define	EFUNCS_IN_C   0
4796877Sda73024 #endif
4806877Sda73024 
4816877Sda73024 #if !defined(AES_DECRYPT)
4826877Sda73024 #define	DFUNCS_IN_C   0
4836877Sda73024 #elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
4846877Sda73024 	defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
4856877Sda73024 #define	DFUNCS_IN_C   DEC_KEYING_IN_C
4866877Sda73024 #elif !defined(ASM_X86_V2)
4876877Sda73024 #define	DFUNCS_IN_C   (DECRYPTION_IN_C | DEC_KEYING_IN_C)
4886877Sda73024 #else
4896877Sda73024 #define	DFUNCS_IN_C   0
4906877Sda73024 #endif
4916877Sda73024 
4926877Sda73024 #define	FUNCS_IN_C  (EFUNCS_IN_C | DFUNCS_IN_C)
4936877Sda73024 
4946877Sda73024 /* END OF CONFIGURATION OPTIONS */
4956877Sda73024 
4966877Sda73024 /* Disable or report errors on some combinations of options */
4976877Sda73024 
4986877Sda73024 #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
4996877Sda73024 #undef  LAST_ENC_ROUND
5006877Sda73024 #define	LAST_ENC_ROUND  NO_TABLES
5016877Sda73024 #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
5026877Sda73024 #undef  LAST_ENC_ROUND
5036877Sda73024 #define	LAST_ENC_ROUND  ONE_TABLE
5046877Sda73024 #endif
5056877Sda73024 
5066877Sda73024 #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
5076877Sda73024 #undef  ENC_UNROLL
5086877Sda73024 #define	ENC_UNROLL  NONE
5096877Sda73024 #endif
5106877Sda73024 
5116877Sda73024 #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
5126877Sda73024 #undef  LAST_DEC_ROUND
5136877Sda73024 #define	LAST_DEC_ROUND  NO_TABLES
5146877Sda73024 #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
5156877Sda73024 #undef  LAST_DEC_ROUND
5166877Sda73024 #define	LAST_DEC_ROUND  ONE_TABLE
5176877Sda73024 #endif
5186877Sda73024 
5196877Sda73024 #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
5206877Sda73024 #undef  DEC_UNROLL
5216877Sda73024 #define	DEC_UNROLL  NONE
5226877Sda73024 #endif
5236877Sda73024 
524*7421SDaniel.Anderson@Sun.COM #if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
525*7421SDaniel.Anderson@Sun.COM #define	aes_sw32	htonl
526*7421SDaniel.Anderson@Sun.COM #elif defined(bswap32)
5276877Sda73024 #define	aes_sw32	bswap32
5286877Sda73024 #elif defined(bswap_32)
5296877Sda73024 #define	aes_sw32	bswap_32
5306877Sda73024 #else
531*7421SDaniel.Anderson@Sun.COM #define	brot(x, n)  (((uint32_t)(x) << (n)) | ((uint32_t)(x) >> (32 - (n))))
5326877Sda73024 #define	aes_sw32(x) ((brot((x), 8) & 0x00ff00ff) | (brot((x), 24) & 0xff00ff00))
5336877Sda73024 #endif
5346877Sda73024 
535*7421SDaniel.Anderson@Sun.COM 
5366877Sda73024 /*
537*7421SDaniel.Anderson@Sun.COM  *	upr(x, n):  rotates bytes within words by n positions, moving bytes to
5386877Sda73024  *		higher index positions with wrap around into low positions
5396877Sda73024  *	ups(x, n):  moves bytes by n positions to higher index positions in
5406877Sda73024  *		words but without wrap around
5416877Sda73024  *	bval(x, n): extracts a byte from a word
5426877Sda73024  *
5436877Sda73024  *	WARNING:   The definitions given here are intended only for use with
5446877Sda73024  *		unsigned variables and with shift counts that are compile
5456877Sda73024  *		time constants
5466877Sda73024  */
5476877Sda73024 
5486877Sda73024 #if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
5496877Sda73024 #define	upr(x, n)	(((uint32_t)(x) << (8 * (n))) | \
5506877Sda73024 			((uint32_t)(x) >> (32 - 8 * (n))))
5516877Sda73024 #define	ups(x, n)	((uint32_t)(x) << (8 * (n)))
5526877Sda73024 #define	bval(x, n)	to_byte((x) >> (8 * (n)))
5536877Sda73024 #define	bytes2word(b0, b1, b2, b3)  \
5546877Sda73024 		(((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | \
5556877Sda73024 		((uint32_t)(b1) << 8) | (b0))
5566877Sda73024 #endif
5576877Sda73024 
5586877Sda73024 #if (ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN)
5596877Sda73024 #define	upr(x, n)	(((uint32_t)(x) >> (8 * (n))) | \
5606877Sda73024 			((uint32_t)(x) << (32 - 8 * (n))))
5616877Sda73024 #define	ups(x, n)	((uint32_t)(x) >> (8 * (n)))
5626877Sda73024 #define	bval(x, n)	to_byte((x) >> (24 - 8 * (n)))
5636877Sda73024 #define	bytes2word(b0, b1, b2, b3)  \
5646877Sda73024 		(((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | \
5656877Sda73024 		((uint32_t)(b2) << 8) | (b3))
5666877Sda73024 #endif
5676877Sda73024 
5686877Sda73024 #if defined(SAFE_IO)
5696877Sda73024 #define	word_in(x, c)	bytes2word(((const uint8_t *)(x) + 4 * c)[0], \
5706877Sda73024 				((const uint8_t *)(x) + 4 * c)[1], \
5716877Sda73024 				((const uint8_t *)(x) + 4 * c)[2], \
5726877Sda73024 				((const uint8_t *)(x) + 4 * c)[3])
5736877Sda73024 #define	word_out(x, c, v) { ((uint8_t *)(x) + 4 * c)[0] = bval(v, 0); \
5746877Sda73024 			((uint8_t *)(x) + 4 * c)[1] = bval(v, 1); \
5756877Sda73024 			((uint8_t *)(x) + 4 * c)[2] = bval(v, 2); \
5766877Sda73024 			((uint8_t *)(x) + 4 * c)[3] = bval(v, 3); }
5776877Sda73024 #elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)
5786877Sda73024 #define	word_in(x, c)	(*((uint32_t *)(x) + (c)))
5796877Sda73024 #define	word_out(x, c, v) (*((uint32_t *)(x) + (c)) = (v))
5806877Sda73024 #else
5816877Sda73024 #define	word_in(x, c)	aes_sw32(*((uint32_t *)(x) + (c)))
5826877Sda73024 #define	word_out(x, c, v) (*((uint32_t *)(x) + (c)) = aes_sw32(v))
5836877Sda73024 #endif
5846877Sda73024 
5856877Sda73024 /* the finite field modular polynomial and elements */
5866877Sda73024 
5876877Sda73024 #define	WPOLY   0x011b
5886877Sda73024 #define	BPOLY	0x1b
5896877Sda73024 
5906877Sda73024 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
5916877Sda73024 
5926877Sda73024 #define	m1  0x80808080
5936877Sda73024 #define	m2  0x7f7f7f7f
5946877Sda73024 #define	gf_mulx(x)  ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
5956877Sda73024 
5966877Sda73024 /*
5976877Sda73024  * The following defines provide alternative definitions of gf_mulx that might
5986877Sda73024  * give improved performance if a fast 32-bit multiply is not available. Note
5996877Sda73024  * that a temporary variable u needs to be defined where gf_mulx is used.
6006877Sda73024  *
6016877Sda73024  * #define	gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ \
6026877Sda73024  *			((u >> 3) | (u >> 6))
6036877Sda73024  * #define	m4  (0x01010101 * BPOLY)
6046877Sda73024  * #define	gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) \
6056877Sda73024  *			& m4)
6066877Sda73024  */
6076877Sda73024 
6086877Sda73024 /* Work out which tables are needed for the different options   */
6096877Sda73024 
6106877Sda73024 #if defined(ASM_X86_V1C)
6116877Sda73024 #if defined(ENC_ROUND)
6126877Sda73024 #undef  ENC_ROUND
6136877Sda73024 #endif
6146877Sda73024 #define	ENC_ROUND   FOUR_TABLES
6156877Sda73024 #if defined(LAST_ENC_ROUND)
6166877Sda73024 #undef  LAST_ENC_ROUND
6176877Sda73024 #endif
6186877Sda73024 #define	LAST_ENC_ROUND  FOUR_TABLES
6196877Sda73024 #if defined(DEC_ROUND)
6206877Sda73024 #undef  DEC_ROUND
6216877Sda73024 #endif
6226877Sda73024 #define	DEC_ROUND   FOUR_TABLES
6236877Sda73024 #if defined(LAST_DEC_ROUND)
6246877Sda73024 #undef  LAST_DEC_ROUND
6256877Sda73024 #endif
6266877Sda73024 #define	LAST_DEC_ROUND  FOUR_TABLES
6276877Sda73024 #if defined(KEY_SCHED)
6286877Sda73024 #undef  KEY_SCHED
6296877Sda73024 #define	KEY_SCHED   FOUR_TABLES
6306877Sda73024 #endif
6316877Sda73024 #endif
6326877Sda73024 
6336877Sda73024 #if (FUNCS_IN_C & ENCRYPTION_IN_C) || defined(ASM_X86_V1C)
6346877Sda73024 #if ENC_ROUND == ONE_TABLE
6356877Sda73024 #define	FT1_SET
6366877Sda73024 #elif ENC_ROUND == FOUR_TABLES
6376877Sda73024 #define	FT4_SET
6386877Sda73024 #else
6396877Sda73024 #define	SBX_SET
6406877Sda73024 #endif
6416877Sda73024 #if LAST_ENC_ROUND == ONE_TABLE
6426877Sda73024 #define	FL1_SET
6436877Sda73024 #elif LAST_ENC_ROUND == FOUR_TABLES
6446877Sda73024 #define	FL4_SET
6456877Sda73024 #elif !defined(SBX_SET)
6466877Sda73024 #define	SBX_SET
6476877Sda73024 #endif
6486877Sda73024 #endif
6496877Sda73024 
6506877Sda73024 #if (FUNCS_IN_C & DECRYPTION_IN_C) || defined(ASM_X86_V1C)
6516877Sda73024 #if DEC_ROUND == ONE_TABLE
6526877Sda73024 #define	IT1_SET
6536877Sda73024 #elif DEC_ROUND == FOUR_TABLES
6546877Sda73024 #define	IT4_SET
6556877Sda73024 #else
6566877Sda73024 #define	ISB_SET
6576877Sda73024 #endif
6586877Sda73024 #if LAST_DEC_ROUND == ONE_TABLE
6596877Sda73024 #define	IL1_SET
6606877Sda73024 #elif LAST_DEC_ROUND == FOUR_TABLES
6616877Sda73024 #define	IL4_SET
6626877Sda73024 #elif !defined(ISB_SET)
6636877Sda73024 #define	ISB_SET
6646877Sda73024 #endif
6656877Sda73024 #endif
6666877Sda73024 
6676877Sda73024 
6686877Sda73024 #if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
6696877Sda73024 	defined(ASM_X86_V2C)))
6706877Sda73024 #if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))
6716877Sda73024 #if KEY_SCHED == ONE_TABLE
6726877Sda73024 #if !defined(FL1_SET) && !defined(FL4_SET)
6736877Sda73024 #define	LS1_SET
6746877Sda73024 #endif
6756877Sda73024 #elif KEY_SCHED == FOUR_TABLES
6766877Sda73024 #if !defined(FL4_SET)
6776877Sda73024 #define	LS4_SET
6786877Sda73024 #endif
6796877Sda73024 #elif !defined(SBX_SET)
6806877Sda73024 #define	SBX_SET
6816877Sda73024 #endif
6826877Sda73024 #endif
6836877Sda73024 #if (FUNCS_IN_C & DEC_KEYING_IN_C)
6846877Sda73024 #if KEY_SCHED == ONE_TABLE
6856877Sda73024 #define	IM1_SET
6866877Sda73024 #elif KEY_SCHED == FOUR_TABLES
6876877Sda73024 #define	IM4_SET
6886877Sda73024 #elif !defined(SBX_SET)
6896877Sda73024 #define	SBX_SET
6906877Sda73024 #endif
6916877Sda73024 #endif
6926877Sda73024 #endif
6936877Sda73024 
6946877Sda73024 /* generic definitions of Rijndael macros that use tables */
6956877Sda73024 
6966877Sda73024 #define	no_table(x, box, vf, rf, c) bytes2word(\
6976877Sda73024 	box[bval(vf(x, 0, c), rf(0, c))], \
6986877Sda73024 	box[bval(vf(x, 1, c), rf(1, c))], \
6996877Sda73024 	box[bval(vf(x, 2, c), rf(2, c))], \
7006877Sda73024 	box[bval(vf(x, 3, c), rf(3, c))])
7016877Sda73024 
7026877Sda73024 #define	one_table(x, op, tab, vf, rf, c) \
7036877Sda73024 	(tab[bval(vf(x, 0, c), rf(0, c))] \
7046877Sda73024 	^ op(tab[bval(vf(x, 1, c), rf(1, c))], 1) \
7056877Sda73024 	^ op(tab[bval(vf(x, 2, c), rf(2, c))], 2) \
7066877Sda73024 	^ op(tab[bval(vf(x, 3, c), rf(3, c))], 3))
7076877Sda73024 
7086877Sda73024 #define	four_tables(x, tab, vf, rf, c) \
7096877Sda73024 	(tab[0][bval(vf(x, 0, c), rf(0, c))] \
7106877Sda73024 	^ tab[1][bval(vf(x, 1, c), rf(1, c))] \
7116877Sda73024 	^ tab[2][bval(vf(x, 2, c), rf(2, c))] \
7126877Sda73024 	^ tab[3][bval(vf(x, 3, c), rf(3, c))])
7136877Sda73024 
7146877Sda73024 #define	vf1(x, r, c)	(x)
7156877Sda73024 #define	rf1(r, c)	(r)
7166877Sda73024 #define	rf2(r, c)	((8+r-c)&3)
7176877Sda73024 
7186877Sda73024 /*
7196877Sda73024  * Perform forward and inverse column mix operation on four bytes in long word
7206877Sda73024  * x in parallel. NOTE: x must be a simple variable, NOT an expression in
7216877Sda73024  * these macros.
7226877Sda73024  */
7236877Sda73024 
7246877Sda73024 #if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
7256877Sda73024 	defined(ASM_X86_V2C)))
7266877Sda73024 
7276877Sda73024 #if defined(FM4_SET)	/* not currently used */
7286877Sda73024 #define	fwd_mcol(x)	four_tables(x, t_use(f, m), vf1, rf1, 0)
7296877Sda73024 #elif defined(FM1_SET)	/* not currently used */
7306877Sda73024 #define	fwd_mcol(x)	one_table(x, upr, t_use(f, m), vf1, rf1, 0)
7316877Sda73024 #else
7326877Sda73024 #define	dec_fmvars	uint32_t g2
7336877Sda73024 #define	fwd_mcol(x)	(g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ \
7346877Sda73024 				upr((x), 2) ^ upr((x), 1))
7356877Sda73024 #endif
7366877Sda73024 
7376877Sda73024 #if defined(IM4_SET)
7386877Sda73024 #define	inv_mcol(x)	four_tables(x, t_use(i, m), vf1, rf1, 0)
7396877Sda73024 #elif defined(IM1_SET)
7406877Sda73024 #define	inv_mcol(x)	one_table(x, upr, t_use(i, m), vf1, rf1, 0)
7416877Sda73024 #else
7426877Sda73024 #define	dec_imvars	uint32_t g2, g4, g9
7436877Sda73024 #define	inv_mcol(x)	(g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = \
7446877Sda73024 				(x) ^ gf_mulx(g4), g4 ^= g9, \
7456877Sda73024 				(x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ \
7466877Sda73024 				upr(g4, 2) ^ upr(g9, 1))
7476877Sda73024 #endif
7486877Sda73024 
7496877Sda73024 #if defined(FL4_SET)
7506877Sda73024 #define	ls_box(x, c)	four_tables(x, t_use(f, l), vf1, rf2, c)
7516877Sda73024 #elif defined(LS4_SET)
7526877Sda73024 #define	ls_box(x, c)	four_tables(x, t_use(l, s), vf1, rf2, c)
7536877Sda73024 #elif defined(FL1_SET)
7546877Sda73024 #define	ls_box(x, c)	one_table(x, upr, t_use(f, l), vf1, rf2, c)
7556877Sda73024 #elif defined(LS1_SET)
7566877Sda73024 #define	ls_box(x, c)	one_table(x, upr, t_use(l, s), vf1, rf2, c)
7576877Sda73024 #else
7586877Sda73024 #define	ls_box(x, c)	no_table(x, t_use(s, box), vf1, rf2, c)
7596877Sda73024 #endif
7606877Sda73024 
7616877Sda73024 #endif
7626877Sda73024 
7636877Sda73024 #if defined(ASM_X86_V1C) && defined(AES_DECRYPT) && !defined(ISB_SET)
7646877Sda73024 #define	ISB_SET
7656877Sda73024 #endif
7666877Sda73024 
7676877Sda73024 #ifdef	__cplusplus
7686877Sda73024 }
7696877Sda73024 #endif
7706877Sda73024 
7716877Sda73024 #endif	/* _AESOPT_H */
772