xref: /plan9-contrib/sys/src/libsec/spim/sha1block.s (revision 12b1df16f1a9233fb47d4260d51ef450cc19c368)
1*12b1df16SDavid du Colombier	TEXT	_sha1block+0(SB),$328
2*12b1df16SDavid du Colombier
3*12b1df16SDavid du Colombier/*
4*12b1df16SDavid du Colombier * wp[off] = x;
5*12b1df16SDavid du Colombier * x += A <<< 5;
6*12b1df16SDavid du Colombier * E += 0xca62c1d6 + x;
7*12b1df16SDavid du Colombier * x = FN(B,C,D);
8*12b1df16SDavid du Colombier * E += x;
9*12b1df16SDavid du Colombier * B >>> 2
10*12b1df16SDavid du Colombier */
11*12b1df16SDavid du Colombier#define BODYX(off,FN,V,A,B,C,D,E)\
12*12b1df16SDavid du Colombier	FN(B,C,D)\
13*12b1df16SDavid du Colombier	ADDU TMP1,E;\
14*12b1df16SDavid du Colombier	ADDU V,E;\
15*12b1df16SDavid du Colombier	MOVW TMP2,off(WREG);\
16*12b1df16SDavid du Colombier	ADDU TMP2,E;\
17*12b1df16SDavid du Colombier	SLL $5,A,TMP3;\
18*12b1df16SDavid du Colombier	SRL $27,A,TMP4;\
19*12b1df16SDavid du Colombier	OR TMP3,TMP4;\
20*12b1df16SDavid du Colombier	ADDU TMP4,E;\
21*12b1df16SDavid du Colombier	SLL $30,B,TMP4;\
22*12b1df16SDavid du Colombier	SRL $2,B;\
23*12b1df16SDavid du Colombier	OR TMP4,B
24*12b1df16SDavid du Colombier
25*12b1df16SDavid du Colombier/*
26*12b1df16SDavid du Colombier * x = data[i]
27*12b1df16SDavid du Colombier * BODYX
28*12b1df16SDavid du Colombier */
29*12b1df16SDavid du Colombier#define BODY1(off,FN,V,A,B,C,D,E)\
30*12b1df16SDavid du Colombier	MOVBU off(DATAREG),TMP2;\
31*12b1df16SDavid du Colombier	MOVBU (off+1)(DATAREG),TMP3;\
32*12b1df16SDavid du Colombier	MOVBU (off+2)(DATAREG),TMP1;\
33*12b1df16SDavid du Colombier	MOVBU (off+3)(DATAREG),TMP4;\
34*12b1df16SDavid du Colombier	SLL $24,TMP2;\
35*12b1df16SDavid du Colombier	SLL $16,TMP3;\
36*12b1df16SDavid du Colombier	OR TMP3,TMP2;\
37*12b1df16SDavid du Colombier	SLL $8,TMP1;\
38*12b1df16SDavid du Colombier	OR TMP1,TMP2;\
39*12b1df16SDavid du Colombier	OR TMP4,TMP2;\
40*12b1df16SDavid du Colombier	BODYX(off,FN,V,A,B,C,D,E)
41*12b1df16SDavid du Colombier
42*12b1df16SDavid du Colombier/*
43*12b1df16SDavid du Colombier * x = (wp[off-3] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1;
44*12b1df16SDavid du Colombier * BODYX
45*12b1df16SDavid du Colombier */
46*12b1df16SDavid du Colombier#define BODY(off,FN,V,A,B,C,D,E)\
47*12b1df16SDavid du Colombier	MOVW (off-64)(WREG),TMP1;\
48*12b1df16SDavid du Colombier	MOVW (off-56)(WREG),TMP2;\
49*12b1df16SDavid du Colombier	MOVW (off-32)(WREG),TMP3;\
50*12b1df16SDavid du Colombier	MOVW (off-12)(WREG),TMP4;\
51*12b1df16SDavid du Colombier	XOR TMP1,TMP2;\
52*12b1df16SDavid du Colombier	XOR TMP3,TMP2;\
53*12b1df16SDavid du Colombier	XOR TMP4,TMP2;\
54*12b1df16SDavid du Colombier	SLL $1,TMP2,TMP1;\
55*12b1df16SDavid du Colombier	SRL $31,TMP2;\
56*12b1df16SDavid du Colombier	OR TMP1,TMP2;\
57*12b1df16SDavid du Colombier	BODYX(off,FN,V,A,B,C,D,E)
58*12b1df16SDavid du Colombier
59*12b1df16SDavid du Colombier/*
60*12b1df16SDavid du Colombier * fn1 = (((C^D)&B)^D);
61*12b1df16SDavid du Colombier */
62*12b1df16SDavid du Colombier#define FN1(B,C,D)\
63*12b1df16SDavid du Colombier	XOR C,D,TMP1;\
64*12b1df16SDavid du Colombier	AND B,TMP1;\
65*12b1df16SDavid du Colombier	XOR D,TMP1;
66*12b1df16SDavid du Colombier
67*12b1df16SDavid du Colombier/*
68*12b1df16SDavid du Colombier * fn24 = B ^ C ^ D
69*12b1df16SDavid du Colombier */
70*12b1df16SDavid du Colombier#define FN24(B,C,D)\
71*12b1df16SDavid du Colombier	XOR B,C,TMP1;\
72*12b1df16SDavid du Colombier	XOR D,TMP1;
73*12b1df16SDavid du Colombier
74*12b1df16SDavid du Colombier/*
75*12b1df16SDavid du Colombier * fn3 = ((B ^ C) & (D ^ B)) ^ B
76*12b1df16SDavid du Colombier */
77*12b1df16SDavid du Colombier#define FN3(B,C,D)\
78*12b1df16SDavid du Colombier	XOR B,C,TMP1;\
79*12b1df16SDavid du Colombier	XOR B,D,TMP4;\
80*12b1df16SDavid du Colombier	AND TMP4,TMP1;\
81*12b1df16SDavid du Colombier	XOR B,TMP1;
82*12b1df16SDavid du Colombier
83*12b1df16SDavid du Colombier/*
84*12b1df16SDavid du Colombier * stack offsets
85*12b1df16SDavid du Colombier * void vtSha1Block(ulong *STATE, uchar *DATA, int LEN)
86*12b1df16SDavid du Colombier */
87*12b1df16SDavid du Colombier#define	DATA	0
88*12b1df16SDavid du Colombier#define	LEN	4
89*12b1df16SDavid du Colombier#define	STATE	8
90*12b1df16SDavid du Colombier
91*12b1df16SDavid du Colombier/*
92*12b1df16SDavid du Colombier * stack offsets for locals
93*12b1df16SDavid du Colombier * ulong w[80];
94*12b1df16SDavid du Colombier * uchar *edata;
95*12b1df16SDavid du Colombier * ulong *w15, *w40, *w60, *w80;
96*12b1df16SDavid du Colombier * register local
97*12b1df16SDavid du Colombier * ulong *wp = BP
98*12b1df16SDavid du Colombier * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi
99*12b1df16SDavid du Colombier * ulong tmp = edi
100*12b1df16SDavid du Colombier */
101*12b1df16SDavid du Colombier#define WARRAY	(-4-(80*4))
102*12b1df16SDavid du Colombier
103*12b1df16SDavid du Colombier#define	AREG		R5
104*12b1df16SDavid du Colombier#define BREG		R6
105*12b1df16SDavid du Colombier#define CREG		R7
106*12b1df16SDavid du Colombier#define DREG		R8
107*12b1df16SDavid du Colombier#define EREG		R9
108*12b1df16SDavid du Colombier#define DATAREG		R1
109*12b1df16SDavid du Colombier#define STREG		R11
110*12b1df16SDavid du Colombier#define WREG		R12
111*12b1df16SDavid du Colombier#define W15REG		R13
112*12b1df16SDavid du Colombier#define W60REG		R14
113*12b1df16SDavid du Colombier#define W40REG		R15
114*12b1df16SDavid du Colombier#define W80REG		R16
115*12b1df16SDavid du Colombier#define EDREG		R17
116*12b1df16SDavid du Colombier#define VREG		R18
117*12b1df16SDavid du Colombier
118*12b1df16SDavid du Colombier#define TMP1		R10
119*12b1df16SDavid du Colombier#define TMP2		R2
120*12b1df16SDavid du Colombier#define TMP3		R3
121*12b1df16SDavid du Colombier#define TMP4		R4
122*12b1df16SDavid du Colombier#define TMP5		R19
123*12b1df16SDavid du Colombier
124*12b1df16SDavid du Colombier	MOVW len+LEN(FP),TMP1
125*12b1df16SDavid du Colombier	MOVW state+STATE(FP),STREG
126*12b1df16SDavid du Colombier	ADDU DATAREG,TMP1,EDREG
127*12b1df16SDavid du Colombier
128*12b1df16SDavid du Colombier	MOVW 0(STREG),AREG
129*12b1df16SDavid du Colombier	MOVW 4(STREG),BREG
130*12b1df16SDavid du Colombier	MOVW 8(STREG),CREG
131*12b1df16SDavid du Colombier	MOVW 12(STREG),DREG
132*12b1df16SDavid du Colombier	MOVW 16(STREG),EREG
133*12b1df16SDavid du Colombier
134*12b1df16SDavid du Colombier	MOVW $warray+WARRAY(SP),WREG
135*12b1df16SDavid du Colombier	ADDU $(15*4),WREG,W15REG
136*12b1df16SDavid du Colombier	ADDU $(40*4),WREG,W40REG
137*12b1df16SDavid du Colombier	ADDU $(60*4),WREG,W60REG
138*12b1df16SDavid du Colombier	ADDU $(80*4),WREG,W80REG
139*12b1df16SDavid du Colombier
140*12b1df16SDavid du Colombiermainloop:
141*12b1df16SDavid du Colombier	MOVW $warray+WARRAY(SP),WREG
142*12b1df16SDavid du Colombier
143*12b1df16SDavid du Colombier	MOVW $0x5a827999,VREG
144*12b1df16SDavid du Colombierloop1:
145*12b1df16SDavid du Colombier	BODY1(0,FN1,VREG,AREG,BREG,CREG,DREG,EREG)
146*12b1df16SDavid du Colombier	BODY1(4,FN1,VREG,EREG,AREG,BREG,CREG,DREG)
147*12b1df16SDavid du Colombier	BODY1(8,FN1,VREG,DREG,EREG,AREG,BREG,CREG)
148*12b1df16SDavid du Colombier	BODY1(12,FN1,VREG,CREG,DREG,EREG,AREG,BREG)
149*12b1df16SDavid du Colombier	BODY1(16,FN1,VREG,BREG,CREG,DREG,EREG,AREG)
150*12b1df16SDavid du Colombier
151*12b1df16SDavid du Colombier	ADDU $20,DATAREG
152*12b1df16SDavid du Colombier	ADDU $20,WREG
153*12b1df16SDavid du Colombier	BNE WREG,W15REG,loop1
154*12b1df16SDavid du Colombier
155*12b1df16SDavid du Colombier	BODY1(0,FN1,VREG,AREG,BREG,CREG,DREG,EREG)
156*12b1df16SDavid du Colombier	ADDU $4,DATAREG
157*12b1df16SDavid du Colombier
158*12b1df16SDavid du Colombier	BODY(4,FN1,VREG,EREG,AREG,BREG,CREG,DREG)
159*12b1df16SDavid du Colombier	BODY(8,FN1,VREG,DREG,EREG,AREG,BREG,CREG)
160*12b1df16SDavid du Colombier	BODY(12,FN1,VREG,CREG,DREG,EREG,AREG,BREG)
161*12b1df16SDavid du Colombier	BODY(16,FN1,VREG,BREG,CREG,DREG,EREG,AREG)
162*12b1df16SDavid du Colombier
163*12b1df16SDavid du Colombier	ADDU $20,WREG
164*12b1df16SDavid du Colombier
165*12b1df16SDavid du Colombier	MOVW $0x6ed9eba1,VREG
166*12b1df16SDavid du Colombierloop2:
167*12b1df16SDavid du Colombier	BODY(0,FN24,VREG,AREG,BREG,CREG,DREG,EREG)
168*12b1df16SDavid du Colombier	BODY(4,FN24,VREG,EREG,AREG,BREG,CREG,DREG)
169*12b1df16SDavid du Colombier	BODY(8,FN24,VREG,DREG,EREG,AREG,BREG,CREG)
170*12b1df16SDavid du Colombier	BODY(12,FN24,VREG,CREG,DREG,EREG,AREG,BREG)
171*12b1df16SDavid du Colombier	BODY(16,FN24,VREG,BREG,CREG,DREG,EREG,AREG)
172*12b1df16SDavid du Colombier
173*12b1df16SDavid du Colombier	ADDU $20,WREG
174*12b1df16SDavid du Colombier	BNE WREG,W40REG,loop2
175*12b1df16SDavid du Colombier
176*12b1df16SDavid du Colombier	MOVW $0x8f1bbcdc,VREG
177*12b1df16SDavid du Colombierloop3:
178*12b1df16SDavid du Colombier	BODY(0,FN3,VREG,AREG,BREG,CREG,DREG,EREG)
179*12b1df16SDavid du Colombier	BODY(4,FN3,VREG,EREG,AREG,BREG,CREG,DREG)
180*12b1df16SDavid du Colombier	BODY(8,FN3,VREG,DREG,EREG,AREG,BREG,CREG)
181*12b1df16SDavid du Colombier	BODY(12,FN3,VREG,CREG,DREG,EREG,AREG,BREG)
182*12b1df16SDavid du Colombier	BODY(16,FN3,VREG,BREG,CREG,DREG,EREG,AREG)
183*12b1df16SDavid du Colombier
184*12b1df16SDavid du Colombier	ADDU $20,WREG
185*12b1df16SDavid du Colombier	BNE WREG,W60REG,loop3
186*12b1df16SDavid du Colombier
187*12b1df16SDavid du Colombier	MOVW $0xca62c1d6,VREG
188*12b1df16SDavid du Colombierloop4:
189*12b1df16SDavid du Colombier	BODY(0,FN24,VREG,AREG,BREG,CREG,DREG,EREG)
190*12b1df16SDavid du Colombier	BODY(4,FN24,VREG,EREG,AREG,BREG,CREG,DREG)
191*12b1df16SDavid du Colombier	BODY(8,FN24,VREG,DREG,EREG,AREG,BREG,CREG)
192*12b1df16SDavid du Colombier	BODY(12,FN24,VREG,CREG,DREG,EREG,AREG,BREG)
193*12b1df16SDavid du Colombier	BODY(16,FN24,VREG,BREG,CREG,DREG,EREG,AREG)
194*12b1df16SDavid du Colombier
195*12b1df16SDavid du Colombier	ADDU $20,WREG
196*12b1df16SDavid du Colombier	BNE WREG,W80REG,loop4
197*12b1df16SDavid du Colombier
198*12b1df16SDavid du Colombier	MOVW 0(STREG),TMP1
199*12b1df16SDavid du Colombier	MOVW 4(STREG),TMP2
200*12b1df16SDavid du Colombier	MOVW 8(STREG),TMP3
201*12b1df16SDavid du Colombier	MOVW 12(STREG),TMP4
202*12b1df16SDavid du Colombier	MOVW 16(STREG),TMP5
203*12b1df16SDavid du Colombier
204*12b1df16SDavid du Colombier	ADDU TMP1,AREG
205*12b1df16SDavid du Colombier	ADDU TMP2,BREG
206*12b1df16SDavid du Colombier	ADDU TMP3,CREG
207*12b1df16SDavid du Colombier	ADDU TMP4,DREG
208*12b1df16SDavid du Colombier	ADDU TMP5,EREG
209*12b1df16SDavid du Colombier
210*12b1df16SDavid du Colombier	MOVW AREG,0(STREG)
211*12b1df16SDavid du Colombier	MOVW BREG,4(STREG)
212*12b1df16SDavid du Colombier	MOVW CREG,8(STREG)
213*12b1df16SDavid du Colombier	MOVW DREG,12(STREG)
214*12b1df16SDavid du Colombier	MOVW EREG,16(STREG)
215*12b1df16SDavid du Colombier
216*12b1df16SDavid du Colombier	BNE DATAREG,EDREG,mainloop
217*12b1df16SDavid du Colombier
218*12b1df16SDavid du Colombier	RET
219*12b1df16SDavid du Colombier
220*12b1df16SDavid du Colombier	END
221