1*2139Sjp161948! des_enc.m4 2*2139Sjp161948! des_enc.S (generated from des_enc.m4) 3*2139Sjp161948! 4*2139Sjp161948! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file. 5*2139Sjp161948! 6*2139Sjp161948! Version 1.0. 32-bit version. 7*2139Sjp161948! 8*2139Sjp161948! June 8, 2000. 9*2139Sjp161948! 10*2139Sjp161948! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation 11*2139Sjp161948! by Andy Polyakov. 12*2139Sjp161948! 13*2139Sjp161948! January 1, 2003. 14*2139Sjp161948! 15*2139Sjp161948! Assembler version: Copyright Svend Olaf Mikkelsen. 16*2139Sjp161948! 17*2139Sjp161948! Original C code: Copyright Eric A. Young. 18*2139Sjp161948! 19*2139Sjp161948! This code can be freely used by LibDES/SSLeay/OpenSSL users. 20*2139Sjp161948! 21*2139Sjp161948! The LibDES/SSLeay/OpenSSL copyright notices must be respected. 22*2139Sjp161948! 23*2139Sjp161948! This version can be redistributed. 24*2139Sjp161948! 25*2139Sjp161948! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S 26*2139Sjp161948! 27*2139Sjp161948! Global registers 1 to 5 are used. This is the same as done by the 28*2139Sjp161948! cc compiler. The UltraSPARC load/store little endian feature is used. 29*2139Sjp161948! 30*2139Sjp161948! Instruction grouping often refers to one CPU cycle. 31*2139Sjp161948! 32*2139Sjp161948! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S 33*2139Sjp161948! 34*2139Sjp161948! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S 35*2139Sjp161948! 36*2139Sjp161948! Performance improvement according to './apps/openssl speed des' 37*2139Sjp161948! 38*2139Sjp161948! 32-bit build: 39*2139Sjp161948! 23% faster than cc-5.2 -xarch=v8plus -xO5 40*2139Sjp161948! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5 41*2139Sjp161948! 64-bit build: 42*2139Sjp161948! 50% faster than cc-5.2 -xarch=v9 -xO5 43*2139Sjp161948! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5 44*2139Sjp161948! 45*2139Sjp161948 46*2139Sjp161948.ident "des_enc.m4 2.1" 47*2139Sjp161948 48*2139Sjp161948#if defined(__SUNPRO_C) && defined(__sparcv9) 49*2139Sjp161948# define ABI64 /* They've said -xarch=v9 at command line */ 50*2139Sjp161948#elif defined(__GNUC__) && defined(__arch64__) 51*2139Sjp161948# define ABI64 /* They've said -m64 at command line */ 52*2139Sjp161948#endif 53*2139Sjp161948 54*2139Sjp161948#ifdef ABI64 55*2139Sjp161948 .register %g2,#scratch 56*2139Sjp161948 .register %g3,#scratch 57*2139Sjp161948# define FRAME -192 58*2139Sjp161948# define BIAS 2047 59*2139Sjp161948# define LDPTR ldx 60*2139Sjp161948# define STPTR stx 61*2139Sjp161948# define ARG0 128 62*2139Sjp161948# define ARGSZ 8 63*2139Sjp161948# ifndef OPENSSL_SYSNAME_ULTRASPARC 64*2139Sjp161948# define OPENSSL_SYSNAME_ULTRASPARC 65*2139Sjp161948# endif 66*2139Sjp161948#else 67*2139Sjp161948# define FRAME -96 68*2139Sjp161948# define BIAS 0 69*2139Sjp161948# define LDPTR ld 70*2139Sjp161948# define STPTR st 71*2139Sjp161948# define ARG0 68 72*2139Sjp161948# define ARGSZ 4 73*2139Sjp161948#endif 74*2139Sjp161948 75*2139Sjp161948#define LOOPS 7 76*2139Sjp161948 77*2139Sjp161948#define global0 %g0 78*2139Sjp161948#define global1 %g1 79*2139Sjp161948#define global2 %g2 80*2139Sjp161948#define global3 %g3 81*2139Sjp161948#define global4 %g4 82*2139Sjp161948#define global5 %g5 83*2139Sjp161948 84*2139Sjp161948#define local0 %l0 85*2139Sjp161948#define local1 %l1 86*2139Sjp161948#define local2 %l2 87*2139Sjp161948#define local3 %l3 88*2139Sjp161948#define local4 %l4 89*2139Sjp161948#define local5 %l5 90*2139Sjp161948#define local7 %l6 91*2139Sjp161948#define local6 %l7 92*2139Sjp161948 93*2139Sjp161948#define in0 %i0 94*2139Sjp161948#define in1 %i1 95*2139Sjp161948#define in2 %i2 96*2139Sjp161948#define in3 %i3 97*2139Sjp161948#define in4 %i4 98*2139Sjp161948#define in5 %i5 99*2139Sjp161948#define in6 %i6 100*2139Sjp161948#define in7 %i7 101*2139Sjp161948 102*2139Sjp161948#define out0 %o0 103*2139Sjp161948#define out1 %o1 104*2139Sjp161948#define out2 %o2 105*2139Sjp161948#define out3 %o3 106*2139Sjp161948#define out4 %o4 107*2139Sjp161948#define out5 %o5 108*2139Sjp161948#define out6 %o6 109*2139Sjp161948#define out7 %o7 110*2139Sjp161948 111*2139Sjp161948#define stub stb 112*2139Sjp161948 113*2139Sjp161948changequote({,}) 114*2139Sjp161948 115*2139Sjp161948 116*2139Sjp161948! Macro definitions: 117*2139Sjp161948 118*2139Sjp161948 119*2139Sjp161948! {ip_macro} 120*2139Sjp161948! 121*2139Sjp161948! The logic used in initial and final permutations is the same as in 122*2139Sjp161948! the C code. The permutations are done with a clever shift, xor, and 123*2139Sjp161948! technique. 124*2139Sjp161948! 125*2139Sjp161948! The macro also loads address sbox 1 to 5 to global 1 to 5, address 126*2139Sjp161948! sbox 6 to local6, and addres sbox 8 to out3. 127*2139Sjp161948! 128*2139Sjp161948! Rotates the halfs 3 left to bring the sbox bits in convenient positions. 129*2139Sjp161948! 130*2139Sjp161948! Loads key first round from address in parameter 5 to out0, out1. 131*2139Sjp161948! 132*2139Sjp161948! After the the original LibDES initial permutation, the resulting left 133*2139Sjp161948! is in the variable initially used for right and vice versa. The macro 134*2139Sjp161948! implements the possibility to keep the halfs in the original registers. 135*2139Sjp161948! 136*2139Sjp161948! parameter 1 left 137*2139Sjp161948! parameter 2 right 138*2139Sjp161948! parameter 3 result left (modify in first round) 139*2139Sjp161948! parameter 4 result right (use in first round) 140*2139Sjp161948! parameter 5 key address 141*2139Sjp161948! parameter 6 1/2 for include encryption/decryption 142*2139Sjp161948! parameter 7 1 for move in1 to in3 143*2139Sjp161948! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 144*2139Sjp161948! parameter 9 1 for load ks3 and ks2 to in4 and in3 145*2139Sjp161948 146*2139Sjp161948define(ip_macro, { 147*2139Sjp161948 148*2139Sjp161948! {ip_macro} 149*2139Sjp161948! $1 $2 $4 $3 $5 $6 $7 $8 $9 150*2139Sjp161948 151*2139Sjp161948 ld [out2+256], local1 152*2139Sjp161948 srl $2, 4, local4 153*2139Sjp161948 154*2139Sjp161948 xor local4, $1, local4 155*2139Sjp161948 ifelse($7,1,{mov in1, in3},{nop}) 156*2139Sjp161948 157*2139Sjp161948 ld [out2+260], local2 158*2139Sjp161948 and local4, local1, local4 159*2139Sjp161948 ifelse($8,1,{mov in3, in4},{}) 160*2139Sjp161948 ifelse($8,2,{mov in4, in3},{}) 161*2139Sjp161948 162*2139Sjp161948 ld [out2+280], out4 ! loop counter 163*2139Sjp161948 sll local4, 4, local1 164*2139Sjp161948 xor $1, local4, $1 165*2139Sjp161948 166*2139Sjp161948 ld [out2+264], local3 167*2139Sjp161948 srl $1, 16, local4 168*2139Sjp161948 xor $2, local1, $2 169*2139Sjp161948 170*2139Sjp161948 ifelse($9,1,{LDPTR KS3, in4},{}) 171*2139Sjp161948 xor local4, $2, local4 172*2139Sjp161948 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr 173*2139Sjp161948 174*2139Sjp161948 ifelse($9,1,{LDPTR KS2, in3},{}) 175*2139Sjp161948 and local4, local2, local4 176*2139Sjp161948 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr 177*2139Sjp161948 178*2139Sjp161948 sll local4, 16, local1 179*2139Sjp161948 xor $2, local4, $2 180*2139Sjp161948 181*2139Sjp161948 srl $2, 2, local4 182*2139Sjp161948 xor $1, local1, $1 183*2139Sjp161948 184*2139Sjp161948 sethi %hi(16711680), local5 185*2139Sjp161948 xor local4, $1, local4 186*2139Sjp161948 187*2139Sjp161948 and local4, local3, local4 188*2139Sjp161948 or local5, 255, local5 189*2139Sjp161948 190*2139Sjp161948 sll local4, 2, local2 191*2139Sjp161948 xor $1, local4, $1 192*2139Sjp161948 193*2139Sjp161948 srl $1, 8, local4 194*2139Sjp161948 xor $2, local2, $2 195*2139Sjp161948 196*2139Sjp161948 xor local4, $2, local4 197*2139Sjp161948 add global1, 768, global4 198*2139Sjp161948 199*2139Sjp161948 and local4, local5, local4 200*2139Sjp161948 add global1, 1024, global5 201*2139Sjp161948 202*2139Sjp161948 ld [out2+272], local7 203*2139Sjp161948 sll local4, 8, local1 204*2139Sjp161948 xor $2, local4, $2 205*2139Sjp161948 206*2139Sjp161948 srl $2, 1, local4 207*2139Sjp161948 xor $1, local1, $1 208*2139Sjp161948 209*2139Sjp161948 ld [$5], out0 ! key 7531 210*2139Sjp161948 xor local4, $1, local4 211*2139Sjp161948 add global1, 256, global2 212*2139Sjp161948 213*2139Sjp161948 ld [$5+4], out1 ! key 8642 214*2139Sjp161948 and local4, local7, local4 215*2139Sjp161948 add global1, 512, global3 216*2139Sjp161948 217*2139Sjp161948 sll local4, 1, local1 218*2139Sjp161948 xor $1, local4, $1 219*2139Sjp161948 220*2139Sjp161948 sll $1, 3, local3 221*2139Sjp161948 xor $2, local1, $2 222*2139Sjp161948 223*2139Sjp161948 sll $2, 3, local2 224*2139Sjp161948 add global1, 1280, local6 ! address sbox 8 225*2139Sjp161948 226*2139Sjp161948 srl $1, 29, local4 227*2139Sjp161948 add global1, 1792, out3 ! address sbox 8 228*2139Sjp161948 229*2139Sjp161948 srl $2, 29, local1 230*2139Sjp161948 or local4, local3, $4 231*2139Sjp161948 232*2139Sjp161948 or local2, local1, $3 233*2139Sjp161948 234*2139Sjp161948 ifelse($6, 1, { 235*2139Sjp161948 236*2139Sjp161948 ld [out2+284], local5 ! 0x0000FC00 used in the rounds 237*2139Sjp161948 or local2, local1, $3 238*2139Sjp161948 xor $4, out0, local1 239*2139Sjp161948 240*2139Sjp161948 call .des_enc.1 241*2139Sjp161948 and local1, 252, local1 242*2139Sjp161948 243*2139Sjp161948 },{}) 244*2139Sjp161948 245*2139Sjp161948 ifelse($6, 2, { 246*2139Sjp161948 247*2139Sjp161948 ld [out2+284], local5 ! 0x0000FC00 used in the rounds 248*2139Sjp161948 or local2, local1, $3 249*2139Sjp161948 xor $4, out0, local1 250*2139Sjp161948 251*2139Sjp161948 call .des_dec.1 252*2139Sjp161948 and local1, 252, local1 253*2139Sjp161948 254*2139Sjp161948 },{}) 255*2139Sjp161948}) 256*2139Sjp161948 257*2139Sjp161948 258*2139Sjp161948! {rounds_macro} 259*2139Sjp161948! 260*2139Sjp161948! The logic used in the DES rounds is the same as in the C code, 261*2139Sjp161948! except that calculations for sbox 1 and sbox 5 begin before 262*2139Sjp161948! the previous round is finished. 263*2139Sjp161948! 264*2139Sjp161948! In each round one half (work) is modified based on key and the 265*2139Sjp161948! other half (use). 266*2139Sjp161948! 267*2139Sjp161948! In this version we do two rounds in a loop repeated 7 times 268*2139Sjp161948! and two rounds seperately. 269*2139Sjp161948! 270*2139Sjp161948! One half has the bits for the sboxes in the following positions: 271*2139Sjp161948! 272*2139Sjp161948! 777777xx555555xx333333xx111111xx 273*2139Sjp161948! 274*2139Sjp161948! 88xx666666xx444444xx222222xx8888 275*2139Sjp161948! 276*2139Sjp161948! The bits for each sbox are xor-ed with the key bits for that box. 277*2139Sjp161948! The above xx bits are cleared, and the result used for lookup in 278*2139Sjp161948! the sbox table. Each sbox entry contains the 4 output bits permuted 279*2139Sjp161948! into 32 bits according to the P permutation. 280*2139Sjp161948! 281*2139Sjp161948! In the description of DES, left and right are switched after 282*2139Sjp161948! each round, except after last round. In this code the original 283*2139Sjp161948! left and right are kept in the same register in all rounds, meaning 284*2139Sjp161948! that after the 16 rounds the result for right is in the register 285*2139Sjp161948! originally used for left. 286*2139Sjp161948! 287*2139Sjp161948! parameter 1 first work (left in first round) 288*2139Sjp161948! parameter 2 first use (right in first round) 289*2139Sjp161948! parameter 3 enc/dec 1/-1 290*2139Sjp161948! parameter 4 loop label 291*2139Sjp161948! parameter 5 key address register 292*2139Sjp161948! parameter 6 optional address for key next encryption/decryption 293*2139Sjp161948! parameter 7 not empty for include retl 294*2139Sjp161948! 295*2139Sjp161948! also compares in2 to 8 296*2139Sjp161948 297*2139Sjp161948define(rounds_macro, { 298*2139Sjp161948 299*2139Sjp161948! {rounds_macro} 300*2139Sjp161948! $1 $2 $3 $4 $5 $6 $7 $8 $9 301*2139Sjp161948 302*2139Sjp161948 xor $2, out0, local1 303*2139Sjp161948 304*2139Sjp161948 ld [out2+284], local5 ! 0x0000FC00 305*2139Sjp161948 ba $4 306*2139Sjp161948 and local1, 252, local1 307*2139Sjp161948 308*2139Sjp161948 .align 32 309*2139Sjp161948 310*2139Sjp161948$4: 311*2139Sjp161948 ! local6 is address sbox 6 312*2139Sjp161948 ! out3 is address sbox 8 313*2139Sjp161948 ! out4 is loop counter 314*2139Sjp161948 315*2139Sjp161948 ld [global1+local1], local1 316*2139Sjp161948 xor $2, out1, out1 ! 8642 317*2139Sjp161948 xor $2, out0, out0 ! 7531 318*2139Sjp161948 fmovs %f0, %f0 ! fxor used for alignment 319*2139Sjp161948 320*2139Sjp161948 srl out1, 4, local0 ! rotate 4 right 321*2139Sjp161948 and out0, local5, local3 ! 3 322*2139Sjp161948 fmovs %f0, %f0 323*2139Sjp161948 324*2139Sjp161948 ld [$5+$3*8], local7 ! key 7531 next round 325*2139Sjp161948 srl local3, 8, local3 ! 3 326*2139Sjp161948 and local0, 252, local2 ! 2 327*2139Sjp161948 fmovs %f0, %f0 328*2139Sjp161948 329*2139Sjp161948 ld [global3+local3],local3 ! 3 330*2139Sjp161948 sll out1, 28, out1 ! rotate 331*2139Sjp161948 xor $1, local1, $1 ! 1 finished, local1 now sbox 7 332*2139Sjp161948 333*2139Sjp161948 ld [global2+local2], local2 ! 2 334*2139Sjp161948 srl out0, 24, local1 ! 7 335*2139Sjp161948 or out1, local0, out1 ! rotate 336*2139Sjp161948 337*2139Sjp161948 ldub [out2+local1], local1 ! 7 (and 0xFC) 338*2139Sjp161948 srl out1, 24, local0 ! 8 339*2139Sjp161948 and out1, local5, local4 ! 4 340*2139Sjp161948 341*2139Sjp161948 ldub [out2+local0], local0 ! 8 (and 0xFC) 342*2139Sjp161948 srl local4, 8, local4 ! 4 343*2139Sjp161948 xor $1, local2, $1 ! 2 finished local2 now sbox 6 344*2139Sjp161948 345*2139Sjp161948 ld [global4+local4],local4 ! 4 346*2139Sjp161948 srl out1, 16, local2 ! 6 347*2139Sjp161948 xor $1, local3, $1 ! 3 finished local3 now sbox 5 348*2139Sjp161948 349*2139Sjp161948 ld [out3+local0],local0 ! 8 350*2139Sjp161948 and local2, 252, local2 ! 6 351*2139Sjp161948 add global1, 1536, local5 ! address sbox 7 352*2139Sjp161948 353*2139Sjp161948 ld [local6+local2], local2 ! 6 354*2139Sjp161948 srl out0, 16, local3 ! 5 355*2139Sjp161948 xor $1, local4, $1 ! 4 finished 356*2139Sjp161948 357*2139Sjp161948 ld [local5+local1],local1 ! 7 358*2139Sjp161948 and local3, 252, local3 ! 5 359*2139Sjp161948 xor $1, local0, $1 ! 8 finished 360*2139Sjp161948 361*2139Sjp161948 ld [global5+local3],local3 ! 5 362*2139Sjp161948 xor $1, local2, $1 ! 6 finished 363*2139Sjp161948 subcc out4, 1, out4 364*2139Sjp161948 365*2139Sjp161948 ld [$5+$3*8+4], out0 ! key 8642 next round 366*2139Sjp161948 xor $1, local7, local2 ! sbox 5 next round 367*2139Sjp161948 xor $1, local1, $1 ! 7 finished 368*2139Sjp161948 369*2139Sjp161948 srl local2, 16, local2 ! sbox 5 next round 370*2139Sjp161948 xor $1, local3, $1 ! 5 finished 371*2139Sjp161948 372*2139Sjp161948 ld [$5+$3*16+4], out1 ! key 8642 next round again 373*2139Sjp161948 and local2, 252, local2 ! sbox5 next round 374*2139Sjp161948! next round 375*2139Sjp161948 xor $1, local7, local7 ! 7531 376*2139Sjp161948 377*2139Sjp161948 ld [global5+local2], local2 ! 5 378*2139Sjp161948 srl local7, 24, local3 ! 7 379*2139Sjp161948 xor $1, out0, out0 ! 8642 380*2139Sjp161948 381*2139Sjp161948 ldub [out2+local3], local3 ! 7 (and 0xFC) 382*2139Sjp161948 srl out0, 4, local0 ! rotate 4 right 383*2139Sjp161948 and local7, 252, local1 ! 1 384*2139Sjp161948 385*2139Sjp161948 sll out0, 28, out0 ! rotate 386*2139Sjp161948 xor $2, local2, $2 ! 5 finished local2 used 387*2139Sjp161948 388*2139Sjp161948 srl local0, 8, local4 ! 4 389*2139Sjp161948 and local0, 252, local2 ! 2 390*2139Sjp161948 ld [local5+local3], local3 ! 7 391*2139Sjp161948 392*2139Sjp161948 srl local0, 16, local5 ! 6 393*2139Sjp161948 or out0, local0, out0 ! rotate 394*2139Sjp161948 ld [global2+local2], local2 ! 2 395*2139Sjp161948 396*2139Sjp161948 srl out0, 24, local0 397*2139Sjp161948 ld [$5+$3*16], out0 ! key 7531 next round 398*2139Sjp161948 and local4, 252, local4 ! 4 399*2139Sjp161948 400*2139Sjp161948 and local5, 252, local5 ! 6 401*2139Sjp161948 ld [global4+local4], local4 ! 4 402*2139Sjp161948 xor $2, local3, $2 ! 7 finished local3 used 403*2139Sjp161948 404*2139Sjp161948 and local0, 252, local0 ! 8 405*2139Sjp161948 ld [local6+local5], local5 ! 6 406*2139Sjp161948 xor $2, local2, $2 ! 2 finished local2 now sbox 3 407*2139Sjp161948 408*2139Sjp161948 srl local7, 8, local2 ! 3 start 409*2139Sjp161948 ld [out3+local0], local0 ! 8 410*2139Sjp161948 xor $2, local4, $2 ! 4 finished 411*2139Sjp161948 412*2139Sjp161948 and local2, 252, local2 ! 3 413*2139Sjp161948 ld [global1+local1], local1 ! 1 414*2139Sjp161948 xor $2, local5, $2 ! 6 finished local5 used 415*2139Sjp161948 416*2139Sjp161948 ld [global3+local2], local2 ! 3 417*2139Sjp161948 xor $2, local0, $2 ! 8 finished 418*2139Sjp161948 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer 419*2139Sjp161948 420*2139Sjp161948 ld [out2+284], local5 ! 0x0000FC00 421*2139Sjp161948 xor $2, out0, local4 ! sbox 1 next round 422*2139Sjp161948 xor $2, local1, $2 ! 1 finished 423*2139Sjp161948 424*2139Sjp161948 xor $2, local2, $2 ! 3 finished 425*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 426*2139Sjp161948 bne,pt %icc, $4 427*2139Sjp161948#else 428*2139Sjp161948 bne $4 429*2139Sjp161948#endif 430*2139Sjp161948 and local4, 252, local1 ! sbox 1 next round 431*2139Sjp161948 432*2139Sjp161948! two rounds more: 433*2139Sjp161948 434*2139Sjp161948 ld [global1+local1], local1 435*2139Sjp161948 xor $2, out1, out1 436*2139Sjp161948 xor $2, out0, out0 437*2139Sjp161948 438*2139Sjp161948 srl out1, 4, local0 ! rotate 439*2139Sjp161948 and out0, local5, local3 440*2139Sjp161948 441*2139Sjp161948 ld [$5+$3*8], local7 ! key 7531 442*2139Sjp161948 srl local3, 8, local3 443*2139Sjp161948 and local0, 252, local2 444*2139Sjp161948 445*2139Sjp161948 ld [global3+local3],local3 446*2139Sjp161948 sll out1, 28, out1 ! rotate 447*2139Sjp161948 xor $1, local1, $1 ! 1 finished, local1 now sbox 7 448*2139Sjp161948 449*2139Sjp161948 ld [global2+local2], local2 450*2139Sjp161948 srl out0, 24, local1 451*2139Sjp161948 or out1, local0, out1 ! rotate 452*2139Sjp161948 453*2139Sjp161948 ldub [out2+local1], local1 454*2139Sjp161948 srl out1, 24, local0 455*2139Sjp161948 and out1, local5, local4 456*2139Sjp161948 457*2139Sjp161948 ldub [out2+local0], local0 458*2139Sjp161948 srl local4, 8, local4 459*2139Sjp161948 xor $1, local2, $1 ! 2 finished local2 now sbox 6 460*2139Sjp161948 461*2139Sjp161948 ld [global4+local4],local4 462*2139Sjp161948 srl out1, 16, local2 463*2139Sjp161948 xor $1, local3, $1 ! 3 finished local3 now sbox 5 464*2139Sjp161948 465*2139Sjp161948 ld [out3+local0],local0 466*2139Sjp161948 and local2, 252, local2 467*2139Sjp161948 add global1, 1536, local5 ! address sbox 7 468*2139Sjp161948 469*2139Sjp161948 ld [local6+local2], local2 470*2139Sjp161948 srl out0, 16, local3 471*2139Sjp161948 xor $1, local4, $1 ! 4 finished 472*2139Sjp161948 473*2139Sjp161948 ld [local5+local1],local1 474*2139Sjp161948 and local3, 252, local3 475*2139Sjp161948 xor $1, local0, $1 476*2139Sjp161948 477*2139Sjp161948 ld [global5+local3],local3 478*2139Sjp161948 xor $1, local2, $1 ! 6 finished 479*2139Sjp161948 cmp in2, 8 480*2139Sjp161948 481*2139Sjp161948 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter 482*2139Sjp161948 xor $1, local7, local2 ! sbox 5 next round 483*2139Sjp161948 xor $1, local1, $1 ! 7 finished 484*2139Sjp161948 485*2139Sjp161948 ld [$5+$3*8+4], out0 486*2139Sjp161948 srl local2, 16, local2 ! sbox 5 next round 487*2139Sjp161948 xor $1, local3, $1 ! 5 finished 488*2139Sjp161948 489*2139Sjp161948 and local2, 252, local2 490*2139Sjp161948! next round (two rounds more) 491*2139Sjp161948 xor $1, local7, local7 ! 7531 492*2139Sjp161948 493*2139Sjp161948 ld [global5+local2], local2 494*2139Sjp161948 srl local7, 24, local3 495*2139Sjp161948 xor $1, out0, out0 ! 8642 496*2139Sjp161948 497*2139Sjp161948 ldub [out2+local3], local3 498*2139Sjp161948 srl out0, 4, local0 ! rotate 499*2139Sjp161948 and local7, 252, local1 500*2139Sjp161948 501*2139Sjp161948 sll out0, 28, out0 ! rotate 502*2139Sjp161948 xor $2, local2, $2 ! 5 finished local2 used 503*2139Sjp161948 504*2139Sjp161948 srl local0, 8, local4 505*2139Sjp161948 and local0, 252, local2 506*2139Sjp161948 ld [local5+local3], local3 507*2139Sjp161948 508*2139Sjp161948 srl local0, 16, local5 509*2139Sjp161948 or out0, local0, out0 ! rotate 510*2139Sjp161948 ld [global2+local2], local2 511*2139Sjp161948 512*2139Sjp161948 srl out0, 24, local0 513*2139Sjp161948 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption 514*2139Sjp161948 and local4, 252, local4 515*2139Sjp161948 516*2139Sjp161948 and local5, 252, local5 517*2139Sjp161948 ld [global4+local4], local4 518*2139Sjp161948 xor $2, local3, $2 ! 7 finished local3 used 519*2139Sjp161948 520*2139Sjp161948 and local0, 252, local0 521*2139Sjp161948 ld [local6+local5], local5 522*2139Sjp161948 xor $2, local2, $2 ! 2 finished local2 now sbox 3 523*2139Sjp161948 524*2139Sjp161948 srl local7, 8, local2 ! 3 start 525*2139Sjp161948 ld [out3+local0], local0 526*2139Sjp161948 xor $2, local4, $2 527*2139Sjp161948 528*2139Sjp161948 and local2, 252, local2 529*2139Sjp161948 ld [global1+local1], local1 530*2139Sjp161948 xor $2, local5, $2 ! 6 finished local5 used 531*2139Sjp161948 532*2139Sjp161948 ld [global3+local2], local2 533*2139Sjp161948 srl $1, 3, local3 534*2139Sjp161948 xor $2, local0, $2 535*2139Sjp161948 536*2139Sjp161948 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption 537*2139Sjp161948 sll $1, 29, local4 538*2139Sjp161948 xor $2, local1, $2 539*2139Sjp161948 540*2139Sjp161948 ifelse($7,{}, {}, {retl}) 541*2139Sjp161948 xor $2, local2, $2 542*2139Sjp161948}) 543*2139Sjp161948 544*2139Sjp161948 545*2139Sjp161948! {fp_macro} 546*2139Sjp161948! 547*2139Sjp161948! parameter 1 right (original left) 548*2139Sjp161948! parameter 2 left (original right) 549*2139Sjp161948! parameter 3 1 for optional store to [in0] 550*2139Sjp161948! parameter 4 1 for load input/output address to local5/7 551*2139Sjp161948! 552*2139Sjp161948! The final permutation logic switches the halfes, meaning that 553*2139Sjp161948! left and right ends up the the registers originally used. 554*2139Sjp161948 555*2139Sjp161948define(fp_macro, { 556*2139Sjp161948 557*2139Sjp161948! {fp_macro} 558*2139Sjp161948! $1 $2 $3 $4 $5 $6 $7 $8 $9 559*2139Sjp161948 560*2139Sjp161948 ! initially undo the rotate 3 left done after initial permutation 561*2139Sjp161948 ! original left is received shifted 3 right and 29 left in local3/4 562*2139Sjp161948 563*2139Sjp161948 sll $2, 29, local1 564*2139Sjp161948 or local3, local4, $1 565*2139Sjp161948 566*2139Sjp161948 srl $2, 3, $2 567*2139Sjp161948 sethi %hi(0x55555555), local2 568*2139Sjp161948 569*2139Sjp161948 or $2, local1, $2 570*2139Sjp161948 or local2, %lo(0x55555555), local2 571*2139Sjp161948 572*2139Sjp161948 srl $2, 1, local3 573*2139Sjp161948 sethi %hi(0x00ff00ff), local1 574*2139Sjp161948 xor local3, $1, local3 575*2139Sjp161948 or local1, %lo(0x00ff00ff), local1 576*2139Sjp161948 and local3, local2, local3 577*2139Sjp161948 sethi %hi(0x33333333), local4 578*2139Sjp161948 sll local3, 1, local2 579*2139Sjp161948 580*2139Sjp161948 xor $1, local3, $1 581*2139Sjp161948 582*2139Sjp161948 srl $1, 8, local3 583*2139Sjp161948 xor $2, local2, $2 584*2139Sjp161948 xor local3, $2, local3 585*2139Sjp161948 or local4, %lo(0x33333333), local4 586*2139Sjp161948 and local3, local1, local3 587*2139Sjp161948 sethi %hi(0x0000ffff), local1 588*2139Sjp161948 sll local3, 8, local2 589*2139Sjp161948 590*2139Sjp161948 xor $2, local3, $2 591*2139Sjp161948 592*2139Sjp161948 srl $2, 2, local3 593*2139Sjp161948 xor $1, local2, $1 594*2139Sjp161948 xor local3, $1, local3 595*2139Sjp161948 or local1, %lo(0x0000ffff), local1 596*2139Sjp161948 and local3, local4, local3 597*2139Sjp161948 sethi %hi(0x0f0f0f0f), local4 598*2139Sjp161948 sll local3, 2, local2 599*2139Sjp161948 600*2139Sjp161948 ifelse($4,1, {LDPTR INPUT, local5}) 601*2139Sjp161948 xor $1, local3, $1 602*2139Sjp161948 603*2139Sjp161948 ifelse($4,1, {LDPTR OUTPUT, local7}) 604*2139Sjp161948 srl $1, 16, local3 605*2139Sjp161948 xor $2, local2, $2 606*2139Sjp161948 xor local3, $2, local3 607*2139Sjp161948 or local4, %lo(0x0f0f0f0f), local4 608*2139Sjp161948 and local3, local1, local3 609*2139Sjp161948 sll local3, 16, local2 610*2139Sjp161948 611*2139Sjp161948 xor $2, local3, local1 612*2139Sjp161948 613*2139Sjp161948 srl local1, 4, local3 614*2139Sjp161948 xor $1, local2, $1 615*2139Sjp161948 xor local3, $1, local3 616*2139Sjp161948 and local3, local4, local3 617*2139Sjp161948 sll local3, 4, local2 618*2139Sjp161948 619*2139Sjp161948 xor $1, local3, $1 620*2139Sjp161948 621*2139Sjp161948 ! optional store: 622*2139Sjp161948 623*2139Sjp161948 ifelse($3,1, {st $1, [in0]}) 624*2139Sjp161948 625*2139Sjp161948 xor local1, local2, $2 626*2139Sjp161948 627*2139Sjp161948 ifelse($3,1, {st $2, [in0+4]}) 628*2139Sjp161948 629*2139Sjp161948}) 630*2139Sjp161948 631*2139Sjp161948 632*2139Sjp161948! {fp_ip_macro} 633*2139Sjp161948! 634*2139Sjp161948! Does initial permutation for next block mixed with 635*2139Sjp161948! final permutation for current block. 636*2139Sjp161948! 637*2139Sjp161948! parameter 1 original left 638*2139Sjp161948! parameter 2 original right 639*2139Sjp161948! parameter 3 left ip 640*2139Sjp161948! parameter 4 right ip 641*2139Sjp161948! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 642*2139Sjp161948! 2: mov in4 to in3 643*2139Sjp161948! 644*2139Sjp161948! also adds -8 to length in2 and loads loop counter to out4 645*2139Sjp161948 646*2139Sjp161948define(fp_ip_macro, { 647*2139Sjp161948 648*2139Sjp161948! {fp_ip_macro} 649*2139Sjp161948! $1 $2 $3 $4 $5 $6 $7 $8 $9 650*2139Sjp161948 651*2139Sjp161948 define({temp1},{out4}) 652*2139Sjp161948 define({temp2},{local3}) 653*2139Sjp161948 654*2139Sjp161948 define({ip1},{local1}) 655*2139Sjp161948 define({ip2},{local2}) 656*2139Sjp161948 define({ip4},{local4}) 657*2139Sjp161948 define({ip5},{local5}) 658*2139Sjp161948 659*2139Sjp161948 ! $1 in local3, local4 660*2139Sjp161948 661*2139Sjp161948 ld [out2+256], ip1 662*2139Sjp161948 sll out5, 29, temp1 663*2139Sjp161948 or local3, local4, $1 664*2139Sjp161948 665*2139Sjp161948 srl out5, 3, $2 666*2139Sjp161948 ifelse($5,2,{mov in4, in3}) 667*2139Sjp161948 668*2139Sjp161948 ld [out2+272], ip5 669*2139Sjp161948 srl $4, 4, local0 670*2139Sjp161948 or $2, temp1, $2 671*2139Sjp161948 672*2139Sjp161948 srl $2, 1, temp1 673*2139Sjp161948 xor temp1, $1, temp1 674*2139Sjp161948 675*2139Sjp161948 and temp1, ip5, temp1 676*2139Sjp161948 xor local0, $3, local0 677*2139Sjp161948 678*2139Sjp161948 sll temp1, 1, temp2 679*2139Sjp161948 xor $1, temp1, $1 680*2139Sjp161948 681*2139Sjp161948 and local0, ip1, local0 682*2139Sjp161948 add in2, -8, in2 683*2139Sjp161948 684*2139Sjp161948 sll local0, 4, local7 685*2139Sjp161948 xor $3, local0, $3 686*2139Sjp161948 687*2139Sjp161948 ld [out2+268], ip4 688*2139Sjp161948 srl $1, 8, temp1 689*2139Sjp161948 xor $2, temp2, $2 690*2139Sjp161948 ld [out2+260], ip2 691*2139Sjp161948 srl $3, 16, local0 692*2139Sjp161948 xor $4, local7, $4 693*2139Sjp161948 xor temp1, $2, temp1 694*2139Sjp161948 xor local0, $4, local0 695*2139Sjp161948 and temp1, ip4, temp1 696*2139Sjp161948 and local0, ip2, local0 697*2139Sjp161948 sll temp1, 8, temp2 698*2139Sjp161948 xor $2, temp1, $2 699*2139Sjp161948 sll local0, 16, local7 700*2139Sjp161948 xor $4, local0, $4 701*2139Sjp161948 702*2139Sjp161948 srl $2, 2, temp1 703*2139Sjp161948 xor $1, temp2, $1 704*2139Sjp161948 705*2139Sjp161948 ld [out2+264], temp2 ! ip3 706*2139Sjp161948 srl $4, 2, local0 707*2139Sjp161948 xor $3, local7, $3 708*2139Sjp161948 xor temp1, $1, temp1 709*2139Sjp161948 xor local0, $3, local0 710*2139Sjp161948 and temp1, temp2, temp1 711*2139Sjp161948 and local0, temp2, local0 712*2139Sjp161948 sll temp1, 2, temp2 713*2139Sjp161948 xor $1, temp1, $1 714*2139Sjp161948 sll local0, 2, local7 715*2139Sjp161948 xor $3, local0, $3 716*2139Sjp161948 717*2139Sjp161948 srl $1, 16, temp1 718*2139Sjp161948 xor $2, temp2, $2 719*2139Sjp161948 srl $3, 8, local0 720*2139Sjp161948 xor $4, local7, $4 721*2139Sjp161948 xor temp1, $2, temp1 722*2139Sjp161948 xor local0, $4, local0 723*2139Sjp161948 and temp1, ip2, temp1 724*2139Sjp161948 and local0, ip4, local0 725*2139Sjp161948 sll temp1, 16, temp2 726*2139Sjp161948 xor $2, temp1, local4 727*2139Sjp161948 sll local0, 8, local7 728*2139Sjp161948 xor $4, local0, $4 729*2139Sjp161948 730*2139Sjp161948 srl $4, 1, local0 731*2139Sjp161948 xor $3, local7, $3 732*2139Sjp161948 733*2139Sjp161948 srl local4, 4, temp1 734*2139Sjp161948 xor local0, $3, local0 735*2139Sjp161948 736*2139Sjp161948 xor $1, temp2, $1 737*2139Sjp161948 and local0, ip5, local0 738*2139Sjp161948 739*2139Sjp161948 sll local0, 1, local7 740*2139Sjp161948 xor temp1, $1, temp1 741*2139Sjp161948 742*2139Sjp161948 xor $3, local0, $3 743*2139Sjp161948 xor $4, local7, $4 744*2139Sjp161948 745*2139Sjp161948 sll $3, 3, local5 746*2139Sjp161948 and temp1, ip1, temp1 747*2139Sjp161948 748*2139Sjp161948 sll temp1, 4, temp2 749*2139Sjp161948 xor $1, temp1, $1 750*2139Sjp161948 751*2139Sjp161948 ifelse($5,1,{LDPTR KS2, in4}) 752*2139Sjp161948 sll $4, 3, local2 753*2139Sjp161948 xor local4, temp2, $2 754*2139Sjp161948 755*2139Sjp161948 ! reload since used as temporar: 756*2139Sjp161948 757*2139Sjp161948 ld [out2+280], out4 ! loop counter 758*2139Sjp161948 759*2139Sjp161948 srl $3, 29, local0 760*2139Sjp161948 ifelse($5,1,{add in4, 120, in4}) 761*2139Sjp161948 762*2139Sjp161948 ifelse($5,1,{LDPTR KS1, in3}) 763*2139Sjp161948 srl $4, 29, local7 764*2139Sjp161948 765*2139Sjp161948 or local0, local5, $4 766*2139Sjp161948 or local2, local7, $3 767*2139Sjp161948 768*2139Sjp161948}) 769*2139Sjp161948 770*2139Sjp161948 771*2139Sjp161948 772*2139Sjp161948! {load_little_endian} 773*2139Sjp161948! 774*2139Sjp161948! parameter 1 address 775*2139Sjp161948! parameter 2 destination left 776*2139Sjp161948! parameter 3 destination right 777*2139Sjp161948! parameter 4 temporar 778*2139Sjp161948! parameter 5 label 779*2139Sjp161948 780*2139Sjp161948define(load_little_endian, { 781*2139Sjp161948 782*2139Sjp161948! {load_little_endian} 783*2139Sjp161948! $1 $2 $3 $4 $5 $6 $7 $8 $9 784*2139Sjp161948 785*2139Sjp161948 ! first in memory to rightmost in register 786*2139Sjp161948 787*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 788*2139Sjp161948 andcc $1, 3, global0 789*2139Sjp161948 bne,pn %icc, $5 790*2139Sjp161948 nop 791*2139Sjp161948 792*2139Sjp161948 lda [$1] 0x88, $2 793*2139Sjp161948 add $1, 4, $4 794*2139Sjp161948 795*2139Sjp161948 ba,pt %icc, $5a 796*2139Sjp161948 lda [$4] 0x88, $3 797*2139Sjp161948#endif 798*2139Sjp161948 799*2139Sjp161948$5: 800*2139Sjp161948 ldub [$1+3], $2 801*2139Sjp161948 802*2139Sjp161948 ldub [$1+2], $4 803*2139Sjp161948 sll $2, 8, $2 804*2139Sjp161948 or $2, $4, $2 805*2139Sjp161948 806*2139Sjp161948 ldub [$1+1], $4 807*2139Sjp161948 sll $2, 8, $2 808*2139Sjp161948 or $2, $4, $2 809*2139Sjp161948 810*2139Sjp161948 ldub [$1+0], $4 811*2139Sjp161948 sll $2, 8, $2 812*2139Sjp161948 or $2, $4, $2 813*2139Sjp161948 814*2139Sjp161948 815*2139Sjp161948 ldub [$1+3+4], $3 816*2139Sjp161948 817*2139Sjp161948 ldub [$1+2+4], $4 818*2139Sjp161948 sll $3, 8, $3 819*2139Sjp161948 or $3, $4, $3 820*2139Sjp161948 821*2139Sjp161948 ldub [$1+1+4], $4 822*2139Sjp161948 sll $3, 8, $3 823*2139Sjp161948 or $3, $4, $3 824*2139Sjp161948 825*2139Sjp161948 ldub [$1+0+4], $4 826*2139Sjp161948 sll $3, 8, $3 827*2139Sjp161948 or $3, $4, $3 828*2139Sjp161948$5a: 829*2139Sjp161948 830*2139Sjp161948}) 831*2139Sjp161948 832*2139Sjp161948 833*2139Sjp161948! {load_little_endian_inc} 834*2139Sjp161948! 835*2139Sjp161948! parameter 1 address 836*2139Sjp161948! parameter 2 destination left 837*2139Sjp161948! parameter 3 destination right 838*2139Sjp161948! parameter 4 temporar 839*2139Sjp161948! parameter 4 label 840*2139Sjp161948! 841*2139Sjp161948! adds 8 to address 842*2139Sjp161948 843*2139Sjp161948define(load_little_endian_inc, { 844*2139Sjp161948 845*2139Sjp161948! {load_little_endian_inc} 846*2139Sjp161948! $1 $2 $3 $4 $5 $6 $7 $8 $9 847*2139Sjp161948 848*2139Sjp161948 ! first in memory to rightmost in register 849*2139Sjp161948 850*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 851*2139Sjp161948 andcc $1, 3, global0 852*2139Sjp161948 bne,pn %icc, $5 853*2139Sjp161948 nop 854*2139Sjp161948 855*2139Sjp161948 lda [$1] 0x88, $2 856*2139Sjp161948 add $1, 4, $1 857*2139Sjp161948 858*2139Sjp161948 lda [$1] 0x88, $3 859*2139Sjp161948 ba,pt %icc, $5a 860*2139Sjp161948 add $1, 4, $1 861*2139Sjp161948#endif 862*2139Sjp161948 863*2139Sjp161948$5: 864*2139Sjp161948 ldub [$1+3], $2 865*2139Sjp161948 866*2139Sjp161948 ldub [$1+2], $4 867*2139Sjp161948 sll $2, 8, $2 868*2139Sjp161948 or $2, $4, $2 869*2139Sjp161948 870*2139Sjp161948 ldub [$1+1], $4 871*2139Sjp161948 sll $2, 8, $2 872*2139Sjp161948 or $2, $4, $2 873*2139Sjp161948 874*2139Sjp161948 ldub [$1+0], $4 875*2139Sjp161948 sll $2, 8, $2 876*2139Sjp161948 or $2, $4, $2 877*2139Sjp161948 878*2139Sjp161948 ldub [$1+3+4], $3 879*2139Sjp161948 add $1, 8, $1 880*2139Sjp161948 881*2139Sjp161948 ldub [$1+2+4-8], $4 882*2139Sjp161948 sll $3, 8, $3 883*2139Sjp161948 or $3, $4, $3 884*2139Sjp161948 885*2139Sjp161948 ldub [$1+1+4-8], $4 886*2139Sjp161948 sll $3, 8, $3 887*2139Sjp161948 or $3, $4, $3 888*2139Sjp161948 889*2139Sjp161948 ldub [$1+0+4-8], $4 890*2139Sjp161948 sll $3, 8, $3 891*2139Sjp161948 or $3, $4, $3 892*2139Sjp161948$5a: 893*2139Sjp161948 894*2139Sjp161948}) 895*2139Sjp161948 896*2139Sjp161948 897*2139Sjp161948! {load_n_bytes} 898*2139Sjp161948! 899*2139Sjp161948! Loads 1 to 7 bytes little endian 900*2139Sjp161948! Remaining bytes are zeroed. 901*2139Sjp161948! 902*2139Sjp161948! parameter 1 address 903*2139Sjp161948! parameter 2 length 904*2139Sjp161948! parameter 3 destination register left 905*2139Sjp161948! parameter 4 destination register right 906*2139Sjp161948! parameter 5 temp 907*2139Sjp161948! parameter 6 temp2 908*2139Sjp161948! parameter 7 label 909*2139Sjp161948! parameter 8 return label 910*2139Sjp161948 911*2139Sjp161948define(load_n_bytes, { 912*2139Sjp161948 913*2139Sjp161948! {load_n_bytes} 914*2139Sjp161948! $1 $2 $5 $6 $7 $8 $7 $8 $9 915*2139Sjp161948 916*2139Sjp161948$7.0: call .+8 917*2139Sjp161948 sll $2, 2, $6 918*2139Sjp161948 919*2139Sjp161948 add %o7,$7.jmp.table-$7.0,$5 920*2139Sjp161948 921*2139Sjp161948 add $5, $6, $5 922*2139Sjp161948 mov 0, $4 923*2139Sjp161948 924*2139Sjp161948 ld [$5], $5 925*2139Sjp161948 926*2139Sjp161948 jmp %o7+$5 927*2139Sjp161948 mov 0, $3 928*2139Sjp161948 929*2139Sjp161948$7.7: 930*2139Sjp161948 ldub [$1+6], $5 931*2139Sjp161948 sll $5, 16, $5 932*2139Sjp161948 or $3, $5, $3 933*2139Sjp161948$7.6: 934*2139Sjp161948 ldub [$1+5], $5 935*2139Sjp161948 sll $5, 8, $5 936*2139Sjp161948 or $3, $5, $3 937*2139Sjp161948$7.5: 938*2139Sjp161948 ldub [$1+4], $5 939*2139Sjp161948 or $3, $5, $3 940*2139Sjp161948$7.4: 941*2139Sjp161948 ldub [$1+3], $5 942*2139Sjp161948 sll $5, 24, $5 943*2139Sjp161948 or $4, $5, $4 944*2139Sjp161948$7.3: 945*2139Sjp161948 ldub [$1+2], $5 946*2139Sjp161948 sll $5, 16, $5 947*2139Sjp161948 or $4, $5, $4 948*2139Sjp161948$7.2: 949*2139Sjp161948 ldub [$1+1], $5 950*2139Sjp161948 sll $5, 8, $5 951*2139Sjp161948 or $4, $5, $4 952*2139Sjp161948$7.1: 953*2139Sjp161948 ldub [$1+0], $5 954*2139Sjp161948 ba $8 955*2139Sjp161948 or $4, $5, $4 956*2139Sjp161948 957*2139Sjp161948 .align 4 958*2139Sjp161948 959*2139Sjp161948$7.jmp.table: 960*2139Sjp161948 .word 0 961*2139Sjp161948 .word $7.1-$7.0 962*2139Sjp161948 .word $7.2-$7.0 963*2139Sjp161948 .word $7.3-$7.0 964*2139Sjp161948 .word $7.4-$7.0 965*2139Sjp161948 .word $7.5-$7.0 966*2139Sjp161948 .word $7.6-$7.0 967*2139Sjp161948 .word $7.7-$7.0 968*2139Sjp161948}) 969*2139Sjp161948 970*2139Sjp161948 971*2139Sjp161948! {store_little_endian} 972*2139Sjp161948! 973*2139Sjp161948! parameter 1 address 974*2139Sjp161948! parameter 2 source left 975*2139Sjp161948! parameter 3 source right 976*2139Sjp161948! parameter 4 temporar 977*2139Sjp161948 978*2139Sjp161948define(store_little_endian, { 979*2139Sjp161948 980*2139Sjp161948! {store_little_endian} 981*2139Sjp161948! $1 $2 $3 $4 $5 $6 $7 $8 $9 982*2139Sjp161948 983*2139Sjp161948 ! rightmost in register to first in memory 984*2139Sjp161948 985*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 986*2139Sjp161948 andcc $1, 3, global0 987*2139Sjp161948 bne,pn %icc, $5 988*2139Sjp161948 nop 989*2139Sjp161948 990*2139Sjp161948 sta $2, [$1] 0x88 991*2139Sjp161948 add $1, 4, $4 992*2139Sjp161948 993*2139Sjp161948 ba,pt %icc, $5a 994*2139Sjp161948 sta $3, [$4] 0x88 995*2139Sjp161948#endif 996*2139Sjp161948 997*2139Sjp161948$5: 998*2139Sjp161948 and $2, 255, $4 999*2139Sjp161948 stub $4, [$1+0] 1000*2139Sjp161948 1001*2139Sjp161948 srl $2, 8, $4 1002*2139Sjp161948 and $4, 255, $4 1003*2139Sjp161948 stub $4, [$1+1] 1004*2139Sjp161948 1005*2139Sjp161948 srl $2, 16, $4 1006*2139Sjp161948 and $4, 255, $4 1007*2139Sjp161948 stub $4, [$1+2] 1008*2139Sjp161948 1009*2139Sjp161948 srl $2, 24, $4 1010*2139Sjp161948 stub $4, [$1+3] 1011*2139Sjp161948 1012*2139Sjp161948 1013*2139Sjp161948 and $3, 255, $4 1014*2139Sjp161948 stub $4, [$1+0+4] 1015*2139Sjp161948 1016*2139Sjp161948 srl $3, 8, $4 1017*2139Sjp161948 and $4, 255, $4 1018*2139Sjp161948 stub $4, [$1+1+4] 1019*2139Sjp161948 1020*2139Sjp161948 srl $3, 16, $4 1021*2139Sjp161948 and $4, 255, $4 1022*2139Sjp161948 stub $4, [$1+2+4] 1023*2139Sjp161948 1024*2139Sjp161948 srl $3, 24, $4 1025*2139Sjp161948 stub $4, [$1+3+4] 1026*2139Sjp161948 1027*2139Sjp161948$5a: 1028*2139Sjp161948 1029*2139Sjp161948}) 1030*2139Sjp161948 1031*2139Sjp161948 1032*2139Sjp161948! {store_n_bytes} 1033*2139Sjp161948! 1034*2139Sjp161948! Stores 1 to 7 bytes little endian 1035*2139Sjp161948! 1036*2139Sjp161948! parameter 1 address 1037*2139Sjp161948! parameter 2 length 1038*2139Sjp161948! parameter 3 source register left 1039*2139Sjp161948! parameter 4 source register right 1040*2139Sjp161948! parameter 5 temp 1041*2139Sjp161948! parameter 6 temp2 1042*2139Sjp161948! parameter 7 label 1043*2139Sjp161948! parameter 8 return label 1044*2139Sjp161948 1045*2139Sjp161948define(store_n_bytes, { 1046*2139Sjp161948 1047*2139Sjp161948! {store_n_bytes} 1048*2139Sjp161948! $1 $2 $5 $6 $7 $8 $7 $8 $9 1049*2139Sjp161948 1050*2139Sjp161948$7.0: call .+8 1051*2139Sjp161948 sll $2, 2, $6 1052*2139Sjp161948 1053*2139Sjp161948 add %o7,$7.jmp.table-$7.0,$5 1054*2139Sjp161948 1055*2139Sjp161948 add $5, $6, $5 1056*2139Sjp161948 1057*2139Sjp161948 ld [$5], $5 1058*2139Sjp161948 1059*2139Sjp161948 jmp %o7+$5 1060*2139Sjp161948 nop 1061*2139Sjp161948 1062*2139Sjp161948$7.7: 1063*2139Sjp161948 srl $3, 16, $5 1064*2139Sjp161948 and $5, 0xff, $5 1065*2139Sjp161948 stub $5, [$1+6] 1066*2139Sjp161948$7.6: 1067*2139Sjp161948 srl $3, 8, $5 1068*2139Sjp161948 and $5, 0xff, $5 1069*2139Sjp161948 stub $5, [$1+5] 1070*2139Sjp161948$7.5: 1071*2139Sjp161948 and $3, 0xff, $5 1072*2139Sjp161948 stub $5, [$1+4] 1073*2139Sjp161948$7.4: 1074*2139Sjp161948 srl $4, 24, $5 1075*2139Sjp161948 stub $5, [$1+3] 1076*2139Sjp161948$7.3: 1077*2139Sjp161948 srl $4, 16, $5 1078*2139Sjp161948 and $5, 0xff, $5 1079*2139Sjp161948 stub $5, [$1+2] 1080*2139Sjp161948$7.2: 1081*2139Sjp161948 srl $4, 8, $5 1082*2139Sjp161948 and $5, 0xff, $5 1083*2139Sjp161948 stub $5, [$1+1] 1084*2139Sjp161948$7.1: 1085*2139Sjp161948 and $4, 0xff, $5 1086*2139Sjp161948 1087*2139Sjp161948 1088*2139Sjp161948 ba $8 1089*2139Sjp161948 stub $5, [$1] 1090*2139Sjp161948 1091*2139Sjp161948 .align 4 1092*2139Sjp161948 1093*2139Sjp161948$7.jmp.table: 1094*2139Sjp161948 1095*2139Sjp161948 .word 0 1096*2139Sjp161948 .word $7.1-$7.0 1097*2139Sjp161948 .word $7.2-$7.0 1098*2139Sjp161948 .word $7.3-$7.0 1099*2139Sjp161948 .word $7.4-$7.0 1100*2139Sjp161948 .word $7.5-$7.0 1101*2139Sjp161948 .word $7.6-$7.0 1102*2139Sjp161948 .word $7.7-$7.0 1103*2139Sjp161948}) 1104*2139Sjp161948 1105*2139Sjp161948 1106*2139Sjp161948define(testvalue,{1}) 1107*2139Sjp161948 1108*2139Sjp161948define(register_init, { 1109*2139Sjp161948 1110*2139Sjp161948! For test purposes: 1111*2139Sjp161948 1112*2139Sjp161948 sethi %hi(testvalue), local0 1113*2139Sjp161948 or local0, %lo(testvalue), local0 1114*2139Sjp161948 1115*2139Sjp161948 ifelse($1,{},{}, {mov local0, $1}) 1116*2139Sjp161948 ifelse($2,{},{}, {mov local0, $2}) 1117*2139Sjp161948 ifelse($3,{},{}, {mov local0, $3}) 1118*2139Sjp161948 ifelse($4,{},{}, {mov local0, $4}) 1119*2139Sjp161948 ifelse($5,{},{}, {mov local0, $5}) 1120*2139Sjp161948 ifelse($6,{},{}, {mov local0, $6}) 1121*2139Sjp161948 ifelse($7,{},{}, {mov local0, $7}) 1122*2139Sjp161948 ifelse($8,{},{}, {mov local0, $8}) 1123*2139Sjp161948 1124*2139Sjp161948 mov local0, local1 1125*2139Sjp161948 mov local0, local2 1126*2139Sjp161948 mov local0, local3 1127*2139Sjp161948 mov local0, local4 1128*2139Sjp161948 mov local0, local5 1129*2139Sjp161948 mov local0, local7 1130*2139Sjp161948 mov local0, local6 1131*2139Sjp161948 mov local0, out0 1132*2139Sjp161948 mov local0, out1 1133*2139Sjp161948 mov local0, out2 1134*2139Sjp161948 mov local0, out3 1135*2139Sjp161948 mov local0, out4 1136*2139Sjp161948 mov local0, out5 1137*2139Sjp161948 mov local0, global1 1138*2139Sjp161948 mov local0, global2 1139*2139Sjp161948 mov local0, global3 1140*2139Sjp161948 mov local0, global4 1141*2139Sjp161948 mov local0, global5 1142*2139Sjp161948 1143*2139Sjp161948}) 1144*2139Sjp161948 1145*2139Sjp161948.section ".text" 1146*2139Sjp161948 1147*2139Sjp161948 .align 32 1148*2139Sjp161948 1149*2139Sjp161948.des_enc: 1150*2139Sjp161948 1151*2139Sjp161948 ! key address in3 1152*2139Sjp161948 ! loads key next encryption/decryption first round from [in4] 1153*2139Sjp161948 1154*2139Sjp161948 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl) 1155*2139Sjp161948 1156*2139Sjp161948 1157*2139Sjp161948 .align 32 1158*2139Sjp161948 1159*2139Sjp161948.des_dec: 1160*2139Sjp161948 1161*2139Sjp161948 ! implemented with out5 as first parameter to avoid 1162*2139Sjp161948 ! register exchange in ede modes 1163*2139Sjp161948 1164*2139Sjp161948 ! key address in4 1165*2139Sjp161948 ! loads key next encryption/decryption first round from [in3] 1166*2139Sjp161948 1167*2139Sjp161948 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl) 1168*2139Sjp161948 1169*2139Sjp161948 1170*2139Sjp161948 1171*2139Sjp161948! void DES_encrypt1(data, ks, enc) 1172*2139Sjp161948! ******************************* 1173*2139Sjp161948 1174*2139Sjp161948 .align 32 1175*2139Sjp161948 .global DES_encrypt1 1176*2139Sjp161948 .type DES_encrypt1,#function 1177*2139Sjp161948 1178*2139Sjp161948DES_encrypt1: 1179*2139Sjp161948 1180*2139Sjp161948 save %sp, FRAME, %sp 1181*2139Sjp161948 1182*2139Sjp161948 call .PIC.me.up 1183*2139Sjp161948 mov .PIC.me.up-(.-4),out0 1184*2139Sjp161948 1185*2139Sjp161948 ld [in0], in5 ! left 1186*2139Sjp161948 cmp in2, 0 ! enc 1187*2139Sjp161948 1188*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1189*2139Sjp161948 be,pn %icc, .encrypt.dec ! enc/dec 1190*2139Sjp161948#else 1191*2139Sjp161948 be .encrypt.dec 1192*2139Sjp161948#endif 1193*2139Sjp161948 ld [in0+4], out5 ! right 1194*2139Sjp161948 1195*2139Sjp161948 ! parameter 6 1/2 for include encryption/decryption 1196*2139Sjp161948 ! parameter 7 1 for move in1 to in3 1197*2139Sjp161948 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1198*2139Sjp161948 1199*2139Sjp161948 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1) 1200*2139Sjp161948 1201*2139Sjp161948 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used 1202*2139Sjp161948 1203*2139Sjp161948 fp_macro(in5, out5, 1) ! 1 for store to [in0] 1204*2139Sjp161948 1205*2139Sjp161948 ret 1206*2139Sjp161948 restore 1207*2139Sjp161948 1208*2139Sjp161948.encrypt.dec: 1209*2139Sjp161948 1210*2139Sjp161948 add in1, 120, in3 ! use last subkey for first round 1211*2139Sjp161948 1212*2139Sjp161948 ! parameter 6 1/2 for include encryption/decryption 1213*2139Sjp161948 ! parameter 7 1 for move in1 to in3 1214*2139Sjp161948 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1215*2139Sjp161948 1216*2139Sjp161948 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4 1217*2139Sjp161948 1218*2139Sjp161948 fp_macro(out5, in5, 1) ! 1 for store to [in0] 1219*2139Sjp161948 1220*2139Sjp161948 ret 1221*2139Sjp161948 restore 1222*2139Sjp161948 1223*2139Sjp161948.DES_encrypt1.end: 1224*2139Sjp161948 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1 1225*2139Sjp161948 1226*2139Sjp161948 1227*2139Sjp161948! void DES_encrypt2(data, ks, enc) 1228*2139Sjp161948!********************************* 1229*2139Sjp161948 1230*2139Sjp161948 ! encrypts/decrypts without initial/final permutation 1231*2139Sjp161948 1232*2139Sjp161948 .align 32 1233*2139Sjp161948 .global DES_encrypt2 1234*2139Sjp161948 .type DES_encrypt2,#function 1235*2139Sjp161948 1236*2139Sjp161948DES_encrypt2: 1237*2139Sjp161948 1238*2139Sjp161948 save %sp, FRAME, %sp 1239*2139Sjp161948 1240*2139Sjp161948 call .PIC.me.up 1241*2139Sjp161948 mov .PIC.me.up-(.-4),out0 1242*2139Sjp161948 1243*2139Sjp161948 ! Set sbox address 1 to 6 and rotate halfs 3 left 1244*2139Sjp161948 ! Errors caught by destest? Yes. Still? *NO* 1245*2139Sjp161948 1246*2139Sjp161948 !sethi %hi(DES_SPtrans), global1 ! address sbox 1 1247*2139Sjp161948 1248*2139Sjp161948 !or global1, %lo(DES_SPtrans), global1 ! sbox 1 1249*2139Sjp161948 1250*2139Sjp161948 add global1, 256, global2 ! sbox 2 1251*2139Sjp161948 add global1, 512, global3 ! sbox 3 1252*2139Sjp161948 1253*2139Sjp161948 ld [in0], out5 ! right 1254*2139Sjp161948 add global1, 768, global4 ! sbox 4 1255*2139Sjp161948 add global1, 1024, global5 ! sbox 5 1256*2139Sjp161948 1257*2139Sjp161948 ld [in0+4], in5 ! left 1258*2139Sjp161948 add global1, 1280, local6 ! sbox 6 1259*2139Sjp161948 add global1, 1792, out3 ! sbox 8 1260*2139Sjp161948 1261*2139Sjp161948 ! rotate 1262*2139Sjp161948 1263*2139Sjp161948 sll in5, 3, local5 1264*2139Sjp161948 mov in1, in3 ! key address to in3 1265*2139Sjp161948 1266*2139Sjp161948 sll out5, 3, local7 1267*2139Sjp161948 srl in5, 29, in5 1268*2139Sjp161948 1269*2139Sjp161948 srl out5, 29, out5 1270*2139Sjp161948 add in5, local5, in5 1271*2139Sjp161948 1272*2139Sjp161948 add out5, local7, out5 1273*2139Sjp161948 cmp in2, 0 1274*2139Sjp161948 1275*2139Sjp161948 ! we use our own stackframe 1276*2139Sjp161948 1277*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1278*2139Sjp161948 be,pn %icc, .encrypt2.dec ! decryption 1279*2139Sjp161948#else 1280*2139Sjp161948 be .encrypt2.dec 1281*2139Sjp161948#endif 1282*2139Sjp161948 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ] 1283*2139Sjp161948 1284*2139Sjp161948 ld [in3], out0 ! key 7531 first round 1285*2139Sjp161948 mov LOOPS, out4 ! loop counter 1286*2139Sjp161948 1287*2139Sjp161948 ld [in3+4], out1 ! key 8642 first round 1288*2139Sjp161948 sethi %hi(0x0000FC00), local5 1289*2139Sjp161948 1290*2139Sjp161948 call .des_enc 1291*2139Sjp161948 mov in3, in4 1292*2139Sjp161948 1293*2139Sjp161948 ! rotate 1294*2139Sjp161948 sll in5, 29, in0 1295*2139Sjp161948 srl in5, 3, in5 1296*2139Sjp161948 sll out5, 29, in1 1297*2139Sjp161948 add in5, in0, in5 1298*2139Sjp161948 srl out5, 3, out5 1299*2139Sjp161948 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 1300*2139Sjp161948 add out5, in1, out5 1301*2139Sjp161948 st in5, [in0] 1302*2139Sjp161948 st out5, [in0+4] 1303*2139Sjp161948 1304*2139Sjp161948 ret 1305*2139Sjp161948 restore 1306*2139Sjp161948 1307*2139Sjp161948 1308*2139Sjp161948.encrypt2.dec: 1309*2139Sjp161948 1310*2139Sjp161948 add in3, 120, in4 1311*2139Sjp161948 1312*2139Sjp161948 ld [in4], out0 ! key 7531 first round 1313*2139Sjp161948 mov LOOPS, out4 ! loop counter 1314*2139Sjp161948 1315*2139Sjp161948 ld [in4+4], out1 ! key 8642 first round 1316*2139Sjp161948 sethi %hi(0x0000FC00), local5 1317*2139Sjp161948 1318*2139Sjp161948 mov in5, local1 ! left expected in out5 1319*2139Sjp161948 mov out5, in5 1320*2139Sjp161948 1321*2139Sjp161948 call .des_dec 1322*2139Sjp161948 mov local1, out5 1323*2139Sjp161948 1324*2139Sjp161948.encrypt2.finish: 1325*2139Sjp161948 1326*2139Sjp161948 ! rotate 1327*2139Sjp161948 sll in5, 29, in0 1328*2139Sjp161948 srl in5, 3, in5 1329*2139Sjp161948 sll out5, 29, in1 1330*2139Sjp161948 add in5, in0, in5 1331*2139Sjp161948 srl out5, 3, out5 1332*2139Sjp161948 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 1333*2139Sjp161948 add out5, in1, out5 1334*2139Sjp161948 st out5, [in0] 1335*2139Sjp161948 st in5, [in0+4] 1336*2139Sjp161948 1337*2139Sjp161948 ret 1338*2139Sjp161948 restore 1339*2139Sjp161948 1340*2139Sjp161948.DES_encrypt2.end: 1341*2139Sjp161948 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2 1342*2139Sjp161948 1343*2139Sjp161948 1344*2139Sjp161948! void DES_encrypt3(data, ks1, ks2, ks3) 1345*2139Sjp161948! ************************************** 1346*2139Sjp161948 1347*2139Sjp161948 .align 32 1348*2139Sjp161948 .global DES_encrypt3 1349*2139Sjp161948 .type DES_encrypt3,#function 1350*2139Sjp161948 1351*2139Sjp161948DES_encrypt3: 1352*2139Sjp161948 1353*2139Sjp161948 save %sp, FRAME, %sp 1354*2139Sjp161948 1355*2139Sjp161948 call .PIC.me.up 1356*2139Sjp161948 mov .PIC.me.up-(.-4),out0 1357*2139Sjp161948 1358*2139Sjp161948 ld [in0], in5 ! left 1359*2139Sjp161948 add in2, 120, in4 ! ks2 1360*2139Sjp161948 1361*2139Sjp161948 ld [in0+4], out5 ! right 1362*2139Sjp161948 mov in3, in2 ! save ks3 1363*2139Sjp161948 1364*2139Sjp161948 ! parameter 6 1/2 for include encryption/decryption 1365*2139Sjp161948 ! parameter 7 1 for mov in1 to in3 1366*2139Sjp161948 ! parameter 8 1 for mov in3 to in4 1367*2139Sjp161948 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1368*2139Sjp161948 1369*2139Sjp161948 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0) 1370*2139Sjp161948 1371*2139Sjp161948 call .des_dec 1372*2139Sjp161948 mov in2, in3 ! preload ks3 1373*2139Sjp161948 1374*2139Sjp161948 call .des_enc 1375*2139Sjp161948 nop 1376*2139Sjp161948 1377*2139Sjp161948 fp_macro(in5, out5, 1) 1378*2139Sjp161948 1379*2139Sjp161948 ret 1380*2139Sjp161948 restore 1381*2139Sjp161948 1382*2139Sjp161948.DES_encrypt3.end: 1383*2139Sjp161948 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3 1384*2139Sjp161948 1385*2139Sjp161948 1386*2139Sjp161948! void DES_decrypt3(data, ks1, ks2, ks3) 1387*2139Sjp161948! ************************************** 1388*2139Sjp161948 1389*2139Sjp161948 .align 32 1390*2139Sjp161948 .global DES_decrypt3 1391*2139Sjp161948 .type DES_decrypt3,#function 1392*2139Sjp161948 1393*2139Sjp161948DES_decrypt3: 1394*2139Sjp161948 1395*2139Sjp161948 save %sp, FRAME, %sp 1396*2139Sjp161948 1397*2139Sjp161948 call .PIC.me.up 1398*2139Sjp161948 mov .PIC.me.up-(.-4),out0 1399*2139Sjp161948 1400*2139Sjp161948 ld [in0], in5 ! left 1401*2139Sjp161948 add in3, 120, in4 ! ks3 1402*2139Sjp161948 1403*2139Sjp161948 ld [in0+4], out5 ! right 1404*2139Sjp161948 mov in2, in3 ! ks2 1405*2139Sjp161948 1406*2139Sjp161948 ! parameter 6 1/2 for include encryption/decryption 1407*2139Sjp161948 ! parameter 7 1 for mov in1 to in3 1408*2139Sjp161948 ! parameter 8 1 for mov in3 to in4 1409*2139Sjp161948 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1410*2139Sjp161948 1411*2139Sjp161948 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0) 1412*2139Sjp161948 1413*2139Sjp161948 call .des_enc 1414*2139Sjp161948 add in1, 120, in4 ! preload ks1 1415*2139Sjp161948 1416*2139Sjp161948 call .des_dec 1417*2139Sjp161948 nop 1418*2139Sjp161948 1419*2139Sjp161948 fp_macro(out5, in5, 1) 1420*2139Sjp161948 1421*2139Sjp161948 ret 1422*2139Sjp161948 restore 1423*2139Sjp161948 1424*2139Sjp161948.DES_decrypt3.end: 1425*2139Sjp161948 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 1426*2139Sjp161948 1427*2139Sjp161948 .align 256 1428*2139Sjp161948 .type .des_and,#object 1429*2139Sjp161948 .size .des_and,284 1430*2139Sjp161948 1431*2139Sjp161948.des_and: 1432*2139Sjp161948 1433*2139Sjp161948! This table is used for AND 0xFC when it is known that register 1434*2139Sjp161948! bits 8-31 are zero. Makes it possible to do three arithmetic 1435*2139Sjp161948! operations in one cycle. 1436*2139Sjp161948 1437*2139Sjp161948 .byte 0, 0, 0, 0, 4, 4, 4, 4 1438*2139Sjp161948 .byte 8, 8, 8, 8, 12, 12, 12, 12 1439*2139Sjp161948 .byte 16, 16, 16, 16, 20, 20, 20, 20 1440*2139Sjp161948 .byte 24, 24, 24, 24, 28, 28, 28, 28 1441*2139Sjp161948 .byte 32, 32, 32, 32, 36, 36, 36, 36 1442*2139Sjp161948 .byte 40, 40, 40, 40, 44, 44, 44, 44 1443*2139Sjp161948 .byte 48, 48, 48, 48, 52, 52, 52, 52 1444*2139Sjp161948 .byte 56, 56, 56, 56, 60, 60, 60, 60 1445*2139Sjp161948 .byte 64, 64, 64, 64, 68, 68, 68, 68 1446*2139Sjp161948 .byte 72, 72, 72, 72, 76, 76, 76, 76 1447*2139Sjp161948 .byte 80, 80, 80, 80, 84, 84, 84, 84 1448*2139Sjp161948 .byte 88, 88, 88, 88, 92, 92, 92, 92 1449*2139Sjp161948 .byte 96, 96, 96, 96, 100, 100, 100, 100 1450*2139Sjp161948 .byte 104, 104, 104, 104, 108, 108, 108, 108 1451*2139Sjp161948 .byte 112, 112, 112, 112, 116, 116, 116, 116 1452*2139Sjp161948 .byte 120, 120, 120, 120, 124, 124, 124, 124 1453*2139Sjp161948 .byte 128, 128, 128, 128, 132, 132, 132, 132 1454*2139Sjp161948 .byte 136, 136, 136, 136, 140, 140, 140, 140 1455*2139Sjp161948 .byte 144, 144, 144, 144, 148, 148, 148, 148 1456*2139Sjp161948 .byte 152, 152, 152, 152, 156, 156, 156, 156 1457*2139Sjp161948 .byte 160, 160, 160, 160, 164, 164, 164, 164 1458*2139Sjp161948 .byte 168, 168, 168, 168, 172, 172, 172, 172 1459*2139Sjp161948 .byte 176, 176, 176, 176, 180, 180, 180, 180 1460*2139Sjp161948 .byte 184, 184, 184, 184, 188, 188, 188, 188 1461*2139Sjp161948 .byte 192, 192, 192, 192, 196, 196, 196, 196 1462*2139Sjp161948 .byte 200, 200, 200, 200, 204, 204, 204, 204 1463*2139Sjp161948 .byte 208, 208, 208, 208, 212, 212, 212, 212 1464*2139Sjp161948 .byte 216, 216, 216, 216, 220, 220, 220, 220 1465*2139Sjp161948 .byte 224, 224, 224, 224, 228, 228, 228, 228 1466*2139Sjp161948 .byte 232, 232, 232, 232, 236, 236, 236, 236 1467*2139Sjp161948 .byte 240, 240, 240, 240, 244, 244, 244, 244 1468*2139Sjp161948 .byte 248, 248, 248, 248, 252, 252, 252, 252 1469*2139Sjp161948 1470*2139Sjp161948 ! 5 numbers for initil/final permutation 1471*2139Sjp161948 1472*2139Sjp161948 .word 0x0f0f0f0f ! offset 256 1473*2139Sjp161948 .word 0x0000ffff ! 260 1474*2139Sjp161948 .word 0x33333333 ! 264 1475*2139Sjp161948 .word 0x00ff00ff ! 268 1476*2139Sjp161948 .word 0x55555555 ! 272 1477*2139Sjp161948 1478*2139Sjp161948 .word 0 ! 276 1479*2139Sjp161948 .word LOOPS ! 280 1480*2139Sjp161948 .word 0x0000FC00 ! 284 1481*2139Sjp161948.PIC.DES_SPtrans: 1482*2139Sjp161948 .word %r_disp32(DES_SPtrans) 1483*2139Sjp161948 1484*2139Sjp161948! input: out0 offset between .PIC.me.up and caller 1485*2139Sjp161948! output: out0 pointer to .PIC.me.up 1486*2139Sjp161948! out2 pointer to .des_and 1487*2139Sjp161948! global1 pointer to DES_SPtrans 1488*2139Sjp161948 .align 32 1489*2139Sjp161948.PIC.me.up: 1490*2139Sjp161948 add out0,%o7,out0 ! pointer to .PIC.me.up 1491*2139Sjp161948#if 1 1492*2139Sjp161948 ld [out0+(.PIC.DES_SPtrans-.PIC.me.up)],global1 1493*2139Sjp161948 add global1,(.PIC.DES_SPtrans-.PIC.me.up),global1 1494*2139Sjp161948 add global1,out0,global1 1495*2139Sjp161948#else 1496*2139Sjp161948# ifdef OPENSSL_PIC 1497*2139Sjp161948 ! In case anybody wonders why this code is same for both ABI. 1498*2139Sjp161948 ! To start with it is not. Do note LDPTR below. But of course 1499*2139Sjp161948 ! you must be wondering why the rest of it does not contain 1500*2139Sjp161948 ! things like %hh, %hm and %lm. Well, those are needed only 1501*2139Sjp161948 ! if OpenSSL library *itself* will become larger than 4GB, 1502*2139Sjp161948 ! which is not going to happen any time soon. 1503*2139Sjp161948 sethi %hi(DES_SPtrans),global1 1504*2139Sjp161948 or global1,%lo(DES_SPtrans),global1 1505*2139Sjp161948 sethi %hi(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2 1506*2139Sjp161948 add global1,out0,global1 1507*2139Sjp161948 add out2,%lo(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2 1508*2139Sjp161948 LDPTR [out2+global1],global1 1509*2139Sjp161948# elif 0 1510*2139Sjp161948 setn DES_SPtrans,out2,global1 ! synthetic instruction ! 1511*2139Sjp161948# elif defined(ABI64) 1512*2139Sjp161948 sethi %hh(DES_SPtrans),out2 1513*2139Sjp161948 or out2,%hm(DES_SPtrans),out2 1514*2139Sjp161948 sethi %lm(DES_SPtrans),global1 1515*2139Sjp161948 or global1,%lo(DES_SPtrans),global1 1516*2139Sjp161948 sllx out2,32,out2 1517*2139Sjp161948 or out2,global1,global1 1518*2139Sjp161948# else 1519*2139Sjp161948 sethi %hi(DES_SPtrans),global1 1520*2139Sjp161948 or global1,%lo(DES_SPtrans),global1 1521*2139Sjp161948# endif 1522*2139Sjp161948#endif 1523*2139Sjp161948 retl 1524*2139Sjp161948 add out0,.des_and-.PIC.me.up,out2 1525*2139Sjp161948 1526*2139Sjp161948! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) 1527*2139Sjp161948! ***************************************************************** 1528*2139Sjp161948 1529*2139Sjp161948 1530*2139Sjp161948 .align 32 1531*2139Sjp161948 .global DES_ncbc_encrypt 1532*2139Sjp161948 .type DES_ncbc_encrypt,#function 1533*2139Sjp161948 1534*2139Sjp161948DES_ncbc_encrypt: 1535*2139Sjp161948 1536*2139Sjp161948 save %sp, FRAME, %sp 1537*2139Sjp161948 1538*2139Sjp161948 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] }) 1539*2139Sjp161948 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] }) 1540*2139Sjp161948 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1541*2139Sjp161948 1542*2139Sjp161948 call .PIC.me.up 1543*2139Sjp161948 mov .PIC.me.up-(.-4),out0 1544*2139Sjp161948 1545*2139Sjp161948 cmp in5, 0 ! enc 1546*2139Sjp161948 1547*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1548*2139Sjp161948 be,pn %icc, .ncbc.dec 1549*2139Sjp161948#else 1550*2139Sjp161948 be .ncbc.dec 1551*2139Sjp161948#endif 1552*2139Sjp161948 STPTR in4, IVEC 1553*2139Sjp161948 1554*2139Sjp161948 ! addr left right temp label 1555*2139Sjp161948 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv 1556*2139Sjp161948 1557*2139Sjp161948 addcc in2, -8, in2 ! bytes missing when first block done 1558*2139Sjp161948 1559*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1560*2139Sjp161948 bl,pn %icc, .ncbc.enc.seven.or.less 1561*2139Sjp161948#else 1562*2139Sjp161948 bl .ncbc.enc.seven.or.less 1563*2139Sjp161948#endif 1564*2139Sjp161948 mov in3, in4 ! schedule 1565*2139Sjp161948 1566*2139Sjp161948.ncbc.enc.next.block: 1567*2139Sjp161948 1568*2139Sjp161948 load_little_endian(in0, out4, global4, local3, .LLE2) ! block 1569*2139Sjp161948 1570*2139Sjp161948.ncbc.enc.next.block_1: 1571*2139Sjp161948 1572*2139Sjp161948 xor in5, out4, in5 ! iv xor 1573*2139Sjp161948 xor out5, global4, out5 ! iv xor 1574*2139Sjp161948 1575*2139Sjp161948 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1576*2139Sjp161948 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2) 1577*2139Sjp161948 1578*2139Sjp161948.ncbc.enc.next.block_2: 1579*2139Sjp161948 1580*2139Sjp161948!// call .des_enc ! compares in2 to 8 1581*2139Sjp161948! rounds inlined for alignment purposes 1582*2139Sjp161948 1583*2139Sjp161948 add global1, 768, global4 ! address sbox 4 since register used below 1584*2139Sjp161948 1585*2139Sjp161948 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3 1586*2139Sjp161948 1587*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1588*2139Sjp161948 bl,pn %icc, .ncbc.enc.next.block_fp 1589*2139Sjp161948#else 1590*2139Sjp161948 bl .ncbc.enc.next.block_fp 1591*2139Sjp161948#endif 1592*2139Sjp161948 add in0, 8, in0 ! input address 1593*2139Sjp161948 1594*2139Sjp161948 ! If 8 or more bytes are to be encrypted after this block, 1595*2139Sjp161948 ! we combine final permutation for this block with initial 1596*2139Sjp161948 ! permutation for next block. Load next block: 1597*2139Sjp161948 1598*2139Sjp161948 load_little_endian(in0, global3, global4, local5, .LLE12) 1599*2139Sjp161948 1600*2139Sjp161948 ! parameter 1 original left 1601*2139Sjp161948 ! parameter 2 original right 1602*2139Sjp161948 ! parameter 3 left ip 1603*2139Sjp161948 ! parameter 4 right ip 1604*2139Sjp161948 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 1605*2139Sjp161948 ! 2: mov in4 to in3 1606*2139Sjp161948 ! 1607*2139Sjp161948 ! also adds -8 to length in2 and loads loop counter to out4 1608*2139Sjp161948 1609*2139Sjp161948 fp_ip_macro(out0, out1, global3, global4, 2) 1610*2139Sjp161948 1611*2139Sjp161948 store_little_endian(in1, out0, out1, local3, .SLE10) ! block 1612*2139Sjp161948 1613*2139Sjp161948 ld [in3], out0 ! key 7531 first round next block 1614*2139Sjp161948 mov in5, local1 1615*2139Sjp161948 xor global3, out5, in5 ! iv xor next block 1616*2139Sjp161948 1617*2139Sjp161948 ld [in3+4], out1 ! key 8642 1618*2139Sjp161948 add global1, 512, global3 ! address sbox 3 since register used 1619*2139Sjp161948 xor global4, local1, out5 ! iv xor next block 1620*2139Sjp161948 1621*2139Sjp161948 ba .ncbc.enc.next.block_2 1622*2139Sjp161948 add in1, 8, in1 ! output adress 1623*2139Sjp161948 1624*2139Sjp161948.ncbc.enc.next.block_fp: 1625*2139Sjp161948 1626*2139Sjp161948 fp_macro(in5, out5) 1627*2139Sjp161948 1628*2139Sjp161948 store_little_endian(in1, in5, out5, local3, .SLE1) ! block 1629*2139Sjp161948 1630*2139Sjp161948 addcc in2, -8, in2 ! bytes missing when next block done 1631*2139Sjp161948 1632*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1633*2139Sjp161948 bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0 1634*2139Sjp161948#else 1635*2139Sjp161948 bpos .ncbc.enc.next.block 1636*2139Sjp161948#endif 1637*2139Sjp161948 add in1, 8, in1 1638*2139Sjp161948 1639*2139Sjp161948.ncbc.enc.seven.or.less: 1640*2139Sjp161948 1641*2139Sjp161948 cmp in2, -8 1642*2139Sjp161948 1643*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1644*2139Sjp161948 ble,pt %icc, .ncbc.enc.finish 1645*2139Sjp161948#else 1646*2139Sjp161948 ble .ncbc.enc.finish 1647*2139Sjp161948#endif 1648*2139Sjp161948 nop 1649*2139Sjp161948 1650*2139Sjp161948 add in2, 8, local1 ! bytes to load 1651*2139Sjp161948 1652*2139Sjp161948 ! addr, length, dest left, dest right, temp, temp2, label, ret label 1653*2139Sjp161948 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1) 1654*2139Sjp161948 1655*2139Sjp161948 ! Loads 1 to 7 bytes little endian to global4, out4 1656*2139Sjp161948 1657*2139Sjp161948 1658*2139Sjp161948.ncbc.enc.finish: 1659*2139Sjp161948 1660*2139Sjp161948 LDPTR IVEC, local4 1661*2139Sjp161948 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec 1662*2139Sjp161948 1663*2139Sjp161948 ret 1664*2139Sjp161948 restore 1665*2139Sjp161948 1666*2139Sjp161948 1667*2139Sjp161948.ncbc.dec: 1668*2139Sjp161948 1669*2139Sjp161948 STPTR in0, INPUT 1670*2139Sjp161948 cmp in2, 0 ! length 1671*2139Sjp161948 add in3, 120, in3 1672*2139Sjp161948 1673*2139Sjp161948 LDPTR IVEC, local7 ! ivec 1674*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1675*2139Sjp161948 ble,pn %icc, .ncbc.dec.finish 1676*2139Sjp161948#else 1677*2139Sjp161948 ble .ncbc.dec.finish 1678*2139Sjp161948#endif 1679*2139Sjp161948 mov in3, in4 ! schedule 1680*2139Sjp161948 1681*2139Sjp161948 STPTR in1, OUTPUT 1682*2139Sjp161948 mov in0, local5 ! input 1683*2139Sjp161948 1684*2139Sjp161948 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec 1685*2139Sjp161948 1686*2139Sjp161948.ncbc.dec.next.block: 1687*2139Sjp161948 1688*2139Sjp161948 load_little_endian(local5, in5, out5, local3, .LLE4) ! block 1689*2139Sjp161948 1690*2139Sjp161948 ! parameter 6 1/2 for include encryption/decryption 1691*2139Sjp161948 ! parameter 7 1 for mov in1 to in3 1692*2139Sjp161948 ! parameter 8 1 for mov in3 to in4 1693*2139Sjp161948 1694*2139Sjp161948 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4 1695*2139Sjp161948 1696*2139Sjp161948 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7 1697*2139Sjp161948 1698*2139Sjp161948 ! in2 is bytes left to be stored 1699*2139Sjp161948 ! in2 is compared to 8 in the rounds 1700*2139Sjp161948 1701*2139Sjp161948 xor out5, in0, out4 ! iv xor 1702*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1703*2139Sjp161948 bl,pn %icc, .ncbc.dec.seven.or.less 1704*2139Sjp161948#else 1705*2139Sjp161948 bl .ncbc.dec.seven.or.less 1706*2139Sjp161948#endif 1707*2139Sjp161948 xor in5, in1, global4 ! iv xor 1708*2139Sjp161948 1709*2139Sjp161948 ! Load ivec next block now, since input and output address might be the same. 1710*2139Sjp161948 1711*2139Sjp161948 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv 1712*2139Sjp161948 1713*2139Sjp161948 store_little_endian(local7, out4, global4, local3, .SLE3) 1714*2139Sjp161948 1715*2139Sjp161948 STPTR local5, INPUT 1716*2139Sjp161948 add local7, 8, local7 1717*2139Sjp161948 addcc in2, -8, in2 1718*2139Sjp161948 1719*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1720*2139Sjp161948 bg,pt %icc, .ncbc.dec.next.block 1721*2139Sjp161948#else 1722*2139Sjp161948 bg .ncbc.dec.next.block 1723*2139Sjp161948#endif 1724*2139Sjp161948 STPTR local7, OUTPUT 1725*2139Sjp161948 1726*2139Sjp161948 1727*2139Sjp161948.ncbc.dec.store.iv: 1728*2139Sjp161948 1729*2139Sjp161948 LDPTR IVEC, local4 ! ivec 1730*2139Sjp161948 store_little_endian(local4, in0, in1, local5, .SLE4) 1731*2139Sjp161948 1732*2139Sjp161948.ncbc.dec.finish: 1733*2139Sjp161948 1734*2139Sjp161948 ret 1735*2139Sjp161948 restore 1736*2139Sjp161948 1737*2139Sjp161948.ncbc.dec.seven.or.less: 1738*2139Sjp161948 1739*2139Sjp161948 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec 1740*2139Sjp161948 1741*2139Sjp161948 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv) 1742*2139Sjp161948 1743*2139Sjp161948 1744*2139Sjp161948.DES_ncbc_encrypt.end: 1745*2139Sjp161948 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt 1746*2139Sjp161948 1747*2139Sjp161948 1748*2139Sjp161948! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc) 1749*2139Sjp161948! ************************************************************************** 1750*2139Sjp161948 1751*2139Sjp161948 1752*2139Sjp161948 .align 32 1753*2139Sjp161948 .global DES_ede3_cbc_encrypt 1754*2139Sjp161948 .type DES_ede3_cbc_encrypt,#function 1755*2139Sjp161948 1756*2139Sjp161948DES_ede3_cbc_encrypt: 1757*2139Sjp161948 1758*2139Sjp161948 save %sp, FRAME, %sp 1759*2139Sjp161948 1760*2139Sjp161948 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] }) 1761*2139Sjp161948 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1762*2139Sjp161948 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] }) 1763*2139Sjp161948 1764*2139Sjp161948 call .PIC.me.up 1765*2139Sjp161948 mov .PIC.me.up-(.-4),out0 1766*2139Sjp161948 1767*2139Sjp161948 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc 1768*2139Sjp161948 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1769*2139Sjp161948 cmp local3, 0 ! enc 1770*2139Sjp161948 1771*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1772*2139Sjp161948 be,pn %icc, .ede3.dec 1773*2139Sjp161948#else 1774*2139Sjp161948 be .ede3.dec 1775*2139Sjp161948#endif 1776*2139Sjp161948 STPTR in4, KS2 1777*2139Sjp161948 1778*2139Sjp161948 STPTR in5, KS3 1779*2139Sjp161948 1780*2139Sjp161948 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec 1781*2139Sjp161948 1782*2139Sjp161948 addcc in2, -8, in2 ! bytes missing after next block 1783*2139Sjp161948 1784*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1785*2139Sjp161948 bl,pn %icc, .ede3.enc.seven.or.less 1786*2139Sjp161948#else 1787*2139Sjp161948 bl .ede3.enc.seven.or.less 1788*2139Sjp161948#endif 1789*2139Sjp161948 STPTR in3, KS1 1790*2139Sjp161948 1791*2139Sjp161948.ede3.enc.next.block: 1792*2139Sjp161948 1793*2139Sjp161948 load_little_endian(in0, out4, global4, local3, .LLE7) 1794*2139Sjp161948 1795*2139Sjp161948.ede3.enc.next.block_1: 1796*2139Sjp161948 1797*2139Sjp161948 LDPTR KS2, in4 1798*2139Sjp161948 xor in5, out4, in5 ! iv xor 1799*2139Sjp161948 xor out5, global4, out5 ! iv xor 1800*2139Sjp161948 1801*2139Sjp161948 LDPTR KS1, in3 1802*2139Sjp161948 add in4, 120, in4 ! for decryption we use last subkey first 1803*2139Sjp161948 nop 1804*2139Sjp161948 1805*2139Sjp161948 ip_macro(in5, out5, in5, out5, in3) 1806*2139Sjp161948 1807*2139Sjp161948.ede3.enc.next.block_2: 1808*2139Sjp161948 1809*2139Sjp161948 call .des_enc ! ks1 in3 1810*2139Sjp161948 nop 1811*2139Sjp161948 1812*2139Sjp161948 call .des_dec ! ks2 in4 1813*2139Sjp161948 LDPTR KS3, in3 1814*2139Sjp161948 1815*2139Sjp161948 call .des_enc ! ks3 in3 compares in2 to 8 1816*2139Sjp161948 nop 1817*2139Sjp161948 1818*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1819*2139Sjp161948 bl,pn %icc, .ede3.enc.next.block_fp 1820*2139Sjp161948#else 1821*2139Sjp161948 bl .ede3.enc.next.block_fp 1822*2139Sjp161948#endif 1823*2139Sjp161948 add in0, 8, in0 1824*2139Sjp161948 1825*2139Sjp161948 ! If 8 or more bytes are to be encrypted after this block, 1826*2139Sjp161948 ! we combine final permutation for this block with initial 1827*2139Sjp161948 ! permutation for next block. Load next block: 1828*2139Sjp161948 1829*2139Sjp161948 load_little_endian(in0, global3, global4, local5, .LLE11) 1830*2139Sjp161948 1831*2139Sjp161948 ! parameter 1 original left 1832*2139Sjp161948 ! parameter 2 original right 1833*2139Sjp161948 ! parameter 3 left ip 1834*2139Sjp161948 ! parameter 4 right ip 1835*2139Sjp161948 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 1836*2139Sjp161948 ! 2: mov in4 to in3 1837*2139Sjp161948 ! 1838*2139Sjp161948 ! also adds -8 to length in2 and loads loop counter to out4 1839*2139Sjp161948 1840*2139Sjp161948 fp_ip_macro(out0, out1, global3, global4, 1) 1841*2139Sjp161948 1842*2139Sjp161948 store_little_endian(in1, out0, out1, local3, .SLE9) ! block 1843*2139Sjp161948 1844*2139Sjp161948 mov in5, local1 1845*2139Sjp161948 xor global3, out5, in5 ! iv xor next block 1846*2139Sjp161948 1847*2139Sjp161948 ld [in3], out0 ! key 7531 1848*2139Sjp161948 add global1, 512, global3 ! address sbox 3 1849*2139Sjp161948 xor global4, local1, out5 ! iv xor next block 1850*2139Sjp161948 1851*2139Sjp161948 ld [in3+4], out1 ! key 8642 1852*2139Sjp161948 add global1, 768, global4 ! address sbox 4 1853*2139Sjp161948 ba .ede3.enc.next.block_2 1854*2139Sjp161948 add in1, 8, in1 1855*2139Sjp161948 1856*2139Sjp161948.ede3.enc.next.block_fp: 1857*2139Sjp161948 1858*2139Sjp161948 fp_macro(in5, out5) 1859*2139Sjp161948 1860*2139Sjp161948 store_little_endian(in1, in5, out5, local3, .SLE5) ! block 1861*2139Sjp161948 1862*2139Sjp161948 addcc in2, -8, in2 ! bytes missing when next block done 1863*2139Sjp161948 1864*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1865*2139Sjp161948 bpos,pt %icc, .ede3.enc.next.block 1866*2139Sjp161948#else 1867*2139Sjp161948 bpos .ede3.enc.next.block 1868*2139Sjp161948#endif 1869*2139Sjp161948 add in1, 8, in1 1870*2139Sjp161948 1871*2139Sjp161948.ede3.enc.seven.or.less: 1872*2139Sjp161948 1873*2139Sjp161948 cmp in2, -8 1874*2139Sjp161948 1875*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1876*2139Sjp161948 ble,pt %icc, .ede3.enc.finish 1877*2139Sjp161948#else 1878*2139Sjp161948 ble .ede3.enc.finish 1879*2139Sjp161948#endif 1880*2139Sjp161948 nop 1881*2139Sjp161948 1882*2139Sjp161948 add in2, 8, local1 ! bytes to load 1883*2139Sjp161948 1884*2139Sjp161948 ! addr, length, dest left, dest right, temp, temp2, label, ret label 1885*2139Sjp161948 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1) 1886*2139Sjp161948 1887*2139Sjp161948.ede3.enc.finish: 1888*2139Sjp161948 1889*2139Sjp161948 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1890*2139Sjp161948 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec 1891*2139Sjp161948 1892*2139Sjp161948 ret 1893*2139Sjp161948 restore 1894*2139Sjp161948 1895*2139Sjp161948.ede3.dec: 1896*2139Sjp161948 1897*2139Sjp161948 STPTR in0, INPUT 1898*2139Sjp161948 add in5, 120, in5 1899*2139Sjp161948 1900*2139Sjp161948 STPTR in1, OUTPUT 1901*2139Sjp161948 mov in0, local5 1902*2139Sjp161948 add in3, 120, in3 1903*2139Sjp161948 1904*2139Sjp161948 STPTR in3, KS1 1905*2139Sjp161948 cmp in2, 0 1906*2139Sjp161948 1907*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1908*2139Sjp161948 ble %icc, .ede3.dec.finish 1909*2139Sjp161948#else 1910*2139Sjp161948 ble .ede3.dec.finish 1911*2139Sjp161948#endif 1912*2139Sjp161948 STPTR in5, KS3 1913*2139Sjp161948 1914*2139Sjp161948 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv 1915*2139Sjp161948 load_little_endian(local7, in0, in1, local3, .LLE8) 1916*2139Sjp161948 1917*2139Sjp161948.ede3.dec.next.block: 1918*2139Sjp161948 1919*2139Sjp161948 load_little_endian(local5, in5, out5, local3, .LLE9) 1920*2139Sjp161948 1921*2139Sjp161948 ! parameter 6 1/2 for include encryption/decryption 1922*2139Sjp161948 ! parameter 7 1 for mov in1 to in3 1923*2139Sjp161948 ! parameter 8 1 for mov in3 to in4 1924*2139Sjp161948 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1925*2139Sjp161948 1926*2139Sjp161948 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4 1927*2139Sjp161948 1928*2139Sjp161948 call .des_enc ! ks2 in3 1929*2139Sjp161948 LDPTR KS1, in4 1930*2139Sjp161948 1931*2139Sjp161948 call .des_dec ! ks1 in4 1932*2139Sjp161948 nop 1933*2139Sjp161948 1934*2139Sjp161948 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7 1935*2139Sjp161948 1936*2139Sjp161948 ! in2 is bytes left to be stored 1937*2139Sjp161948 ! in2 is compared to 8 in the rounds 1938*2139Sjp161948 1939*2139Sjp161948 xor out5, in0, out4 1940*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1941*2139Sjp161948 bl,pn %icc, .ede3.dec.seven.or.less 1942*2139Sjp161948#else 1943*2139Sjp161948 bl .ede3.dec.seven.or.less 1944*2139Sjp161948#endif 1945*2139Sjp161948 xor in5, in1, global4 1946*2139Sjp161948 1947*2139Sjp161948 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block 1948*2139Sjp161948 1949*2139Sjp161948 store_little_endian(local7, out4, global4, local3, .SLE7) ! block 1950*2139Sjp161948 1951*2139Sjp161948 STPTR local5, INPUT 1952*2139Sjp161948 addcc in2, -8, in2 1953*2139Sjp161948 add local7, 8, local7 1954*2139Sjp161948 1955*2139Sjp161948#ifdef OPENSSL_SYSNAME_ULTRASPARC 1956*2139Sjp161948 bg,pt %icc, .ede3.dec.next.block 1957*2139Sjp161948#else 1958*2139Sjp161948 bg .ede3.dec.next.block 1959*2139Sjp161948#endif 1960*2139Sjp161948 STPTR local7, OUTPUT 1961*2139Sjp161948 1962*2139Sjp161948.ede3.dec.store.iv: 1963*2139Sjp161948 1964*2139Sjp161948 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1965*2139Sjp161948 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec 1966*2139Sjp161948 1967*2139Sjp161948.ede3.dec.finish: 1968*2139Sjp161948 1969*2139Sjp161948 ret 1970*2139Sjp161948 restore 1971*2139Sjp161948 1972*2139Sjp161948.ede3.dec.seven.or.less: 1973*2139Sjp161948 1974*2139Sjp161948 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv 1975*2139Sjp161948 1976*2139Sjp161948 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv) 1977*2139Sjp161948 1978*2139Sjp161948 1979*2139Sjp161948.DES_ede3_cbc_encrypt.end: 1980*2139Sjp161948 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt 1981