xref: /plan9-contrib/sys/src/libsec/amd64/sha1block.s (revision 42bf527c5f37ca8e61cd3940c8ea66d190badf15)
1*42bf527cSDavid du Colombier/* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1;
2*42bf527cSDavid du Colombier * wp[off] = x;
3*42bf527cSDavid du Colombier * x += A <<< 5;
4*42bf527cSDavid du Colombier * E += 0xca62c1d6 + x;
5*42bf527cSDavid du Colombier * x = FN(B,C,D);
6*42bf527cSDavid du Colombier * E += x;
7*42bf527cSDavid du Colombier * B >>> 2
8*42bf527cSDavid du Colombier */
9*42bf527cSDavid du Colombier#define BSWAPDI	BYTE $0x0f; BYTE $0xcf;
10*42bf527cSDavid du Colombier
11*42bf527cSDavid du Colombier#define BODY(off,FN,V,A,B,C,D,E)\
12*42bf527cSDavid du Colombier	MOVL (off-64)(BP),DI;\
13*42bf527cSDavid du Colombier	XORL (off-56)(BP),DI;\
14*42bf527cSDavid du Colombier	XORL (off-32)(BP),DI;\
15*42bf527cSDavid du Colombier	XORL (off-12)(BP),DI;\
16*42bf527cSDavid du Colombier	ROLL $1,DI;\
17*42bf527cSDavid du Colombier	MOVL DI,off(BP);\
18*42bf527cSDavid du Colombier	LEAL V(DI)(E*1),E;\
19*42bf527cSDavid du Colombier	MOVL A,DI;\
20*42bf527cSDavid du Colombier	ROLL $5,DI;\
21*42bf527cSDavid du Colombier	ADDL DI,E;\
22*42bf527cSDavid du Colombier	FN(B,C,D)\
23*42bf527cSDavid du Colombier	ADDL DI,E;\
24*42bf527cSDavid du Colombier	RORL $2,B;\
25*42bf527cSDavid du Colombier
26*42bf527cSDavid du Colombier#define BODY0(off,FN,V,A,B,C,D,E)\
27*42bf527cSDavid du Colombier	MOVLQZX off(BX),DI;\
28*42bf527cSDavid du Colombier	BSWAPDI;\
29*42bf527cSDavid du Colombier	MOVL DI,off(BP);\
30*42bf527cSDavid du Colombier	LEAL V(DI)(E*1),E;\
31*42bf527cSDavid du Colombier	MOVL A,DI;\
32*42bf527cSDavid du Colombier	ROLL $5,DI;\
33*42bf527cSDavid du Colombier	ADDL DI,E;\
34*42bf527cSDavid du Colombier	FN(B,C,D)\
35*42bf527cSDavid du Colombier	ADDL DI,E;\
36*42bf527cSDavid du Colombier	RORL $2,B;\
37*42bf527cSDavid du Colombier
38*42bf527cSDavid du Colombier/*
39*42bf527cSDavid du Colombier * fn1 = (((C^D)&B)^D);
40*42bf527cSDavid du Colombier */
41*42bf527cSDavid du Colombier#define FN1(B,C,D)\
42*42bf527cSDavid du Colombier	MOVL C,DI;\
43*42bf527cSDavid du Colombier	XORL D,DI;\
44*42bf527cSDavid du Colombier	ANDL B,DI;\
45*42bf527cSDavid du Colombier	XORL D,DI;\
46*42bf527cSDavid du Colombier
47*42bf527cSDavid du Colombier/*
48*42bf527cSDavid du Colombier * fn24 = B ^ C ^ D
49*42bf527cSDavid du Colombier */
50*42bf527cSDavid du Colombier#define FN24(B,C,D)\
51*42bf527cSDavid du Colombier	MOVL B,DI;\
52*42bf527cSDavid du Colombier	XORL C,DI;\
53*42bf527cSDavid du Colombier	XORL D,DI;\
54*42bf527cSDavid du Colombier
55*42bf527cSDavid du Colombier/*
56*42bf527cSDavid du Colombier * fn3 = ((B ^ C) & (D ^= B)) ^ B
57*42bf527cSDavid du Colombier * D ^= B to restore D
58*42bf527cSDavid du Colombier */
59*42bf527cSDavid du Colombier#define FN3(B,C,D)\
60*42bf527cSDavid du Colombier	MOVL B,DI;\
61*42bf527cSDavid du Colombier	XORL C,DI;\
62*42bf527cSDavid du Colombier	XORL B,D;\
63*42bf527cSDavid du Colombier	ANDL D,DI;\
64*42bf527cSDavid du Colombier	XORL B,DI;\
65*42bf527cSDavid du Colombier	XORL B,D;\
66*42bf527cSDavid du Colombier
67*42bf527cSDavid du Colombier/*
68*42bf527cSDavid du Colombier * stack offsets
69*42bf527cSDavid du Colombier * void sha1block(uchar *DATA, int LEN, ulong *STATE)
70*42bf527cSDavid du Colombier */
71*42bf527cSDavid du Colombier#define	DATA	0
72*42bf527cSDavid du Colombier#define	LEN	8
73*42bf527cSDavid du Colombier#define	STATE	16
74*42bf527cSDavid du Colombier
75*42bf527cSDavid du Colombier/*
76*42bf527cSDavid du Colombier * stack offsets for locals
77*42bf527cSDavid du Colombier * ulong w[80];
78*42bf527cSDavid du Colombier * uchar *edata;
79*42bf527cSDavid du Colombier * ulong *w15, *w40, *w60, *w80;
80*42bf527cSDavid du Colombier * register local
81*42bf527cSDavid du Colombier * ulong *wp = BP
82*42bf527cSDavid du Colombier * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi
83*42bf527cSDavid du Colombier * ulong tmp = edi
84*42bf527cSDavid du Colombier */
85*42bf527cSDavid du Colombier#define	Rpdata	R8
86*42bf527cSDavid du Colombier#define WARRAY	(-8-(80*4))
87*42bf527cSDavid du Colombier#define TMP1	(-16-(80*4))
88*42bf527cSDavid du Colombier#define TMP2	(-24-(80*4))
89*42bf527cSDavid du Colombier#define W15	(-32-(80*4))
90*42bf527cSDavid du Colombier#define W40	(-40-(80*4))
91*42bf527cSDavid du Colombier#define W60	(-48-(80*4))
92*42bf527cSDavid du Colombier#define W80	(-56-(80*4))
93*42bf527cSDavid du Colombier#define EDATA	(-64-(80*4))
94*42bf527cSDavid du Colombier
95*42bf527cSDavid du ColombierTEXT	_sha1block+0(SB),$384
96*42bf527cSDavid du Colombier
97*42bf527cSDavid du Colombier	MOVQ RARG, Rpdata
98*42bf527cSDavid du Colombier	MOVLQZX len+LEN(FP),BX
99*42bf527cSDavid du Colombier	ADDQ BX, RARG
100*42bf527cSDavid du Colombier	MOVQ RARG,edata+EDATA(SP)
101*42bf527cSDavid du Colombier
102*42bf527cSDavid du Colombier	LEAQ aw15+(WARRAY+15*4)(SP),DI
103*42bf527cSDavid du Colombier	MOVQ DI,w15+W15(SP)
104*42bf527cSDavid du Colombier	LEAQ aw40+(WARRAY+40*4)(SP),DX
105*42bf527cSDavid du Colombier	MOVQ DX,w40+W40(SP)
106*42bf527cSDavid du Colombier	LEAQ aw60+(WARRAY+60*4)(SP),CX
107*42bf527cSDavid du Colombier	MOVQ CX,w60+W60(SP)
108*42bf527cSDavid du Colombier	LEAQ aw80+(WARRAY+80*4)(SP),DI
109*42bf527cSDavid du Colombier	MOVQ DI,w80+W80(SP)
110*42bf527cSDavid du Colombier
111*42bf527cSDavid du Colombiermainloop:
112*42bf527cSDavid du Colombier	LEAQ warray+WARRAY(SP),BP
113*42bf527cSDavid du Colombier
114*42bf527cSDavid du Colombier	MOVQ state+STATE(FP),DI
115*42bf527cSDavid du Colombier	MOVL (DI),AX
116*42bf527cSDavid du Colombier	MOVL 4(DI),BX
117*42bf527cSDavid du Colombier	MOVL BX,tmp1+TMP1(SP)
118*42bf527cSDavid du Colombier	MOVL 8(DI),CX
119*42bf527cSDavid du Colombier	MOVL 12(DI),DX
120*42bf527cSDavid du Colombier	MOVL 16(DI),SI
121*42bf527cSDavid du Colombier
122*42bf527cSDavid du Colombier	MOVQ Rpdata,BX
123*42bf527cSDavid du Colombier
124*42bf527cSDavid du Colombierloop1:
125*42bf527cSDavid du Colombier	BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI)
126*42bf527cSDavid du Colombier	MOVL SI,tmp2+TMP2(SP)
127*42bf527cSDavid du Colombier	BODY0(4,FN1,0x5a827999,SI,AX,tmp1+TMP1(SP),CX,DX)
128*42bf527cSDavid du Colombier	MOVL tmp1+TMP1(SP),SI
129*42bf527cSDavid du Colombier	BODY0(8,FN1,0x5a827999,DX,tmp2+TMP2(SP),AX,SI,CX)
130*42bf527cSDavid du Colombier	BODY0(12,FN1,0x5a827999,CX,DX,tmp2+TMP2(SP),AX,SI)
131*42bf527cSDavid du Colombier	MOVL SI,tmp1+TMP1(SP)
132*42bf527cSDavid du Colombier	BODY0(16,FN1,0x5a827999,SI,CX,DX,tmp2+TMP2(SP),AX)
133*42bf527cSDavid du Colombier	MOVL tmp2+TMP2(SP),SI
134*42bf527cSDavid du Colombier
135*42bf527cSDavid du Colombier	ADDQ $20,BX
136*42bf527cSDavid du Colombier	ADDQ $20,BP
137*42bf527cSDavid du Colombier	CMPQ BP,w15+W15(SP)
138*42bf527cSDavid du Colombier	JCS loop1
139*42bf527cSDavid du Colombier
140*42bf527cSDavid du Colombier	BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI)
141*42bf527cSDavid du Colombier	ADDQ $4,BX
142*42bf527cSDavid du Colombier	MOVQ BX,R8
143*42bf527cSDavid du Colombier	MOVQ tmp1+TMP1(SP),BX
144*42bf527cSDavid du Colombier
145*42bf527cSDavid du Colombier	BODY(4,FN1,0x5a827999,SI,AX,BX,CX,DX)
146*42bf527cSDavid du Colombier	BODY(8,FN1,0x5a827999,DX,SI,AX,BX,CX)
147*42bf527cSDavid du Colombier	BODY(12,FN1,0x5a827999,CX,DX,SI,AX,BX)
148*42bf527cSDavid du Colombier	BODY(16,FN1,0x5a827999,BX,CX,DX,SI,AX)
149*42bf527cSDavid du Colombier
150*42bf527cSDavid du Colombier	ADDQ $20,BP
151*42bf527cSDavid du Colombier
152*42bf527cSDavid du Colombierloop2:
153*42bf527cSDavid du Colombier	BODY(0,FN24,0x6ed9eba1,AX,BX,CX,DX,SI)
154*42bf527cSDavid du Colombier	BODY(4,FN24,0x6ed9eba1,SI,AX,BX,CX,DX)
155*42bf527cSDavid du Colombier	BODY(8,FN24,0x6ed9eba1,DX,SI,AX,BX,CX)
156*42bf527cSDavid du Colombier	BODY(12,FN24,0x6ed9eba1,CX,DX,SI,AX,BX)
157*42bf527cSDavid du Colombier	BODY(16,FN24,0x6ed9eba1,BX,CX,DX,SI,AX)
158*42bf527cSDavid du Colombier
159*42bf527cSDavid du Colombier	ADDQ $20,BP
160*42bf527cSDavid du Colombier	CMPQ BP,w40+W40(SP)
161*42bf527cSDavid du Colombier	JCS loop2
162*42bf527cSDavid du Colombier
163*42bf527cSDavid du Colombierloop3:
164*42bf527cSDavid du Colombier	BODY(0,FN3,0x8f1bbcdc,AX,BX,CX,DX,SI)
165*42bf527cSDavid du Colombier	BODY(4,FN3,0x8f1bbcdc,SI,AX,BX,CX,DX)
166*42bf527cSDavid du Colombier	BODY(8,FN3,0x8f1bbcdc,DX,SI,AX,BX,CX)
167*42bf527cSDavid du Colombier	BODY(12,FN3,0x8f1bbcdc,CX,DX,SI,AX,BX)
168*42bf527cSDavid du Colombier	BODY(16,FN3,0x8f1bbcdc,BX,CX,DX,SI,AX)
169*42bf527cSDavid du Colombier
170*42bf527cSDavid du Colombier	ADDQ $20,BP
171*42bf527cSDavid du Colombier	CMPQ BP,w60+W60(SP)
172*42bf527cSDavid du Colombier	JCS loop3
173*42bf527cSDavid du Colombier
174*42bf527cSDavid du Colombierloop4:
175*42bf527cSDavid du Colombier	BODY(0,FN24,0xca62c1d6,AX,BX,CX,DX,SI)
176*42bf527cSDavid du Colombier	BODY(4,FN24,0xca62c1d6,SI,AX,BX,CX,DX)
177*42bf527cSDavid du Colombier	BODY(8,FN24,0xca62c1d6,DX,SI,AX,BX,CX)
178*42bf527cSDavid du Colombier	BODY(12,FN24,0xca62c1d6,CX,DX,SI,AX,BX)
179*42bf527cSDavid du Colombier	BODY(16,FN24,0xca62c1d6,BX,CX,DX,SI,AX)
180*42bf527cSDavid du Colombier
181*42bf527cSDavid du Colombier	ADDQ $20,BP
182*42bf527cSDavid du Colombier	CMPQ BP,w80+W80(SP)
183*42bf527cSDavid du Colombier	JCS loop4
184*42bf527cSDavid du Colombier
185*42bf527cSDavid du Colombier	MOVQ state+STATE(FP),DI
186*42bf527cSDavid du Colombier	ADDL AX,0(DI)
187*42bf527cSDavid du Colombier	ADDL BX,4(DI)
188*42bf527cSDavid du Colombier	ADDL CX,8(DI)
189*42bf527cSDavid du Colombier	ADDL DX,12(DI)
190*42bf527cSDavid du Colombier	ADDL SI,16(DI)
191*42bf527cSDavid du Colombier
192*42bf527cSDavid du Colombier	MOVQ edata+EDATA(SP),DI
193*42bf527cSDavid du Colombier	CMPQ Rpdata,DI
194*42bf527cSDavid du Colombier	JCS mainloop
195*42bf527cSDavid du Colombier
196*42bf527cSDavid du Colombier	RET
197*42bf527cSDavid du Colombier	END
198