1/* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1; 2 * wp[off] = x; 3 * x += A <<< 5; 4 * E += 0xca62c1d6 + x; 5 * x = FN(B,C,D); 6 * E += x; 7 * B >>> 2 8 */ 9#define BSWAPDI BYTE $0x0f; BYTE $0xcf; 10 11#define BODY(off,FN,V,A,B,C,D,E)\ 12 MOVL (off-64)(BP),DI;\ 13 XORL (off-56)(BP),DI;\ 14 XORL (off-32)(BP),DI;\ 15 XORL (off-12)(BP),DI;\ 16 ROLL $1,DI;\ 17 MOVL DI,off(BP);\ 18 LEAL V(DI)(E*1),E;\ 19 MOVL A,DI;\ 20 ROLL $5,DI;\ 21 ADDL DI,E;\ 22 FN(B,C,D)\ 23 ADDL DI,E;\ 24 RORL $2,B;\ 25 26#define BODY0(off,FN,V,A,B,C,D,E)\ 27 MOVLQZX off(BX),DI;\ 28 BSWAPDI;\ 29 MOVL DI,off(BP);\ 30 LEAL V(DI)(E*1),E;\ 31 MOVL A,DI;\ 32 ROLL $5,DI;\ 33 ADDL DI,E;\ 34 FN(B,C,D)\ 35 ADDL DI,E;\ 36 RORL $2,B;\ 37 38/* 39 * fn1 = (((C^D)&B)^D); 40 */ 41#define FN1(B,C,D)\ 42 MOVL C,DI;\ 43 XORL D,DI;\ 44 ANDL B,DI;\ 45 XORL D,DI;\ 46 47/* 48 * fn24 = B ^ C ^ D 49 */ 50#define FN24(B,C,D)\ 51 MOVL B,DI;\ 52 XORL C,DI;\ 53 XORL D,DI;\ 54 55/* 56 * fn3 = ((B ^ C) & (D ^= B)) ^ B 57 * D ^= B to restore D 58 */ 59#define FN3(B,C,D)\ 60 MOVL B,DI;\ 61 XORL C,DI;\ 62 XORL B,D;\ 63 ANDL D,DI;\ 64 XORL B,DI;\ 65 XORL B,D;\ 66 67/* 68 * stack offsets 69 * void sha1block(uchar *DATA, int LEN, ulong *STATE) 70 */ 71#define DATA 0 72#define LEN 8 73#define STATE 16 74 75/* 76 * stack offsets for locals 77 * ulong w[80]; 78 * uchar *edata; 79 * ulong *w15, *w40, *w60, *w80; 80 * register local 81 * ulong *wp = BP 82 * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi 83 * ulong tmp = edi 84 */ 85#define Rpdata R8 86#define WARRAY (-8-(80*4)) 87#define TMP1 (-16-(80*4)) 88#define TMP2 (-24-(80*4)) 89#define W15 (-32-(80*4)) 90#define W40 (-40-(80*4)) 91#define W60 (-48-(80*4)) 92#define W80 (-56-(80*4)) 93#define EDATA (-64-(80*4)) 94 95TEXT _sha1block+0(SB),$384 96 97 MOVQ RARG, Rpdata 98 MOVLQZX len+LEN(FP),BX 99 ADDQ BX, RARG 100 MOVQ RARG,edata+EDATA(SP) 101 102 LEAQ aw15+(WARRAY+15*4)(SP),DI 103 MOVQ DI,w15+W15(SP) 104 LEAQ aw40+(WARRAY+40*4)(SP),DX 105 MOVQ DX,w40+W40(SP) 106 LEAQ aw60+(WARRAY+60*4)(SP),CX 107 MOVQ CX,w60+W60(SP) 108 LEAQ aw80+(WARRAY+80*4)(SP),DI 109 MOVQ DI,w80+W80(SP) 110 111mainloop: 112 LEAQ warray+WARRAY(SP),BP 113 114 MOVQ state+STATE(FP),DI 115 MOVL (DI),AX 116 MOVL 4(DI),BX 117 MOVL BX,tmp1+TMP1(SP) 118 MOVL 8(DI),CX 119 MOVL 12(DI),DX 120 MOVL 16(DI),SI 121 122 MOVQ Rpdata,BX 123 124loop1: 125 BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI) 126 MOVL SI,tmp2+TMP2(SP) 127 BODY0(4,FN1,0x5a827999,SI,AX,tmp1+TMP1(SP),CX,DX) 128 MOVL tmp1+TMP1(SP),SI 129 BODY0(8,FN1,0x5a827999,DX,tmp2+TMP2(SP),AX,SI,CX) 130 BODY0(12,FN1,0x5a827999,CX,DX,tmp2+TMP2(SP),AX,SI) 131 MOVL SI,tmp1+TMP1(SP) 132 BODY0(16,FN1,0x5a827999,SI,CX,DX,tmp2+TMP2(SP),AX) 133 MOVL tmp2+TMP2(SP),SI 134 135 ADDQ $20,BX 136 ADDQ $20,BP 137 CMPQ BP,w15+W15(SP) 138 JCS loop1 139 140 BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI) 141 ADDQ $4,BX 142 MOVQ BX,R8 143 MOVQ tmp1+TMP1(SP),BX 144 145 BODY(4,FN1,0x5a827999,SI,AX,BX,CX,DX) 146 BODY(8,FN1,0x5a827999,DX,SI,AX,BX,CX) 147 BODY(12,FN1,0x5a827999,CX,DX,SI,AX,BX) 148 BODY(16,FN1,0x5a827999,BX,CX,DX,SI,AX) 149 150 ADDQ $20,BP 151 152loop2: 153 BODY(0,FN24,0x6ed9eba1,AX,BX,CX,DX,SI) 154 BODY(4,FN24,0x6ed9eba1,SI,AX,BX,CX,DX) 155 BODY(8,FN24,0x6ed9eba1,DX,SI,AX,BX,CX) 156 BODY(12,FN24,0x6ed9eba1,CX,DX,SI,AX,BX) 157 BODY(16,FN24,0x6ed9eba1,BX,CX,DX,SI,AX) 158 159 ADDQ $20,BP 160 CMPQ BP,w40+W40(SP) 161 JCS loop2 162 163loop3: 164 BODY(0,FN3,0x8f1bbcdc,AX,BX,CX,DX,SI) 165 BODY(4,FN3,0x8f1bbcdc,SI,AX,BX,CX,DX) 166 BODY(8,FN3,0x8f1bbcdc,DX,SI,AX,BX,CX) 167 BODY(12,FN3,0x8f1bbcdc,CX,DX,SI,AX,BX) 168 BODY(16,FN3,0x8f1bbcdc,BX,CX,DX,SI,AX) 169 170 ADDQ $20,BP 171 CMPQ BP,w60+W60(SP) 172 JCS loop3 173 174loop4: 175 BODY(0,FN24,0xca62c1d6,AX,BX,CX,DX,SI) 176 BODY(4,FN24,0xca62c1d6,SI,AX,BX,CX,DX) 177 BODY(8,FN24,0xca62c1d6,DX,SI,AX,BX,CX) 178 BODY(12,FN24,0xca62c1d6,CX,DX,SI,AX,BX) 179 BODY(16,FN24,0xca62c1d6,BX,CX,DX,SI,AX) 180 181 ADDQ $20,BP 182 CMPQ BP,w80+W80(SP) 183 JCS loop4 184 185 MOVQ state+STATE(FP),DI 186 ADDL AX,0(DI) 187 ADDL BX,4(DI) 188 ADDL CX,8(DI) 189 ADDL DX,12(DI) 190 ADDL SI,16(DI) 191 192 MOVQ edata+EDATA(SP),DI 193 CMPQ Rpdata,DI 194 JCS mainloop 195 196 RET 197 END 198