xref: /plan9-contrib/sys/src/libsec/amd64/sha1block.s (revision 42bf527c5f37ca8e61cd3940c8ea66d190badf15)
1/* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1;
2 * wp[off] = x;
3 * x += A <<< 5;
4 * E += 0xca62c1d6 + x;
5 * x = FN(B,C,D);
6 * E += x;
7 * B >>> 2
8 */
9#define BSWAPDI	BYTE $0x0f; BYTE $0xcf;
10
11#define BODY(off,FN,V,A,B,C,D,E)\
12	MOVL (off-64)(BP),DI;\
13	XORL (off-56)(BP),DI;\
14	XORL (off-32)(BP),DI;\
15	XORL (off-12)(BP),DI;\
16	ROLL $1,DI;\
17	MOVL DI,off(BP);\
18	LEAL V(DI)(E*1),E;\
19	MOVL A,DI;\
20	ROLL $5,DI;\
21	ADDL DI,E;\
22	FN(B,C,D)\
23	ADDL DI,E;\
24	RORL $2,B;\
25
26#define BODY0(off,FN,V,A,B,C,D,E)\
27	MOVLQZX off(BX),DI;\
28	BSWAPDI;\
29	MOVL DI,off(BP);\
30	LEAL V(DI)(E*1),E;\
31	MOVL A,DI;\
32	ROLL $5,DI;\
33	ADDL DI,E;\
34	FN(B,C,D)\
35	ADDL DI,E;\
36	RORL $2,B;\
37
38/*
39 * fn1 = (((C^D)&B)^D);
40 */
41#define FN1(B,C,D)\
42	MOVL C,DI;\
43	XORL D,DI;\
44	ANDL B,DI;\
45	XORL D,DI;\
46
47/*
48 * fn24 = B ^ C ^ D
49 */
50#define FN24(B,C,D)\
51	MOVL B,DI;\
52	XORL C,DI;\
53	XORL D,DI;\
54
55/*
56 * fn3 = ((B ^ C) & (D ^= B)) ^ B
57 * D ^= B to restore D
58 */
59#define FN3(B,C,D)\
60	MOVL B,DI;\
61	XORL C,DI;\
62	XORL B,D;\
63	ANDL D,DI;\
64	XORL B,DI;\
65	XORL B,D;\
66
67/*
68 * stack offsets
69 * void sha1block(uchar *DATA, int LEN, ulong *STATE)
70 */
71#define	DATA	0
72#define	LEN	8
73#define	STATE	16
74
75/*
76 * stack offsets for locals
77 * ulong w[80];
78 * uchar *edata;
79 * ulong *w15, *w40, *w60, *w80;
80 * register local
81 * ulong *wp = BP
82 * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi
83 * ulong tmp = edi
84 */
85#define	Rpdata	R8
86#define WARRAY	(-8-(80*4))
87#define TMP1	(-16-(80*4))
88#define TMP2	(-24-(80*4))
89#define W15	(-32-(80*4))
90#define W40	(-40-(80*4))
91#define W60	(-48-(80*4))
92#define W80	(-56-(80*4))
93#define EDATA	(-64-(80*4))
94
95TEXT	_sha1block+0(SB),$384
96
97	MOVQ RARG, Rpdata
98	MOVLQZX len+LEN(FP),BX
99	ADDQ BX, RARG
100	MOVQ RARG,edata+EDATA(SP)
101
102	LEAQ aw15+(WARRAY+15*4)(SP),DI
103	MOVQ DI,w15+W15(SP)
104	LEAQ aw40+(WARRAY+40*4)(SP),DX
105	MOVQ DX,w40+W40(SP)
106	LEAQ aw60+(WARRAY+60*4)(SP),CX
107	MOVQ CX,w60+W60(SP)
108	LEAQ aw80+(WARRAY+80*4)(SP),DI
109	MOVQ DI,w80+W80(SP)
110
111mainloop:
112	LEAQ warray+WARRAY(SP),BP
113
114	MOVQ state+STATE(FP),DI
115	MOVL (DI),AX
116	MOVL 4(DI),BX
117	MOVL BX,tmp1+TMP1(SP)
118	MOVL 8(DI),CX
119	MOVL 12(DI),DX
120	MOVL 16(DI),SI
121
122	MOVQ Rpdata,BX
123
124loop1:
125	BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI)
126	MOVL SI,tmp2+TMP2(SP)
127	BODY0(4,FN1,0x5a827999,SI,AX,tmp1+TMP1(SP),CX,DX)
128	MOVL tmp1+TMP1(SP),SI
129	BODY0(8,FN1,0x5a827999,DX,tmp2+TMP2(SP),AX,SI,CX)
130	BODY0(12,FN1,0x5a827999,CX,DX,tmp2+TMP2(SP),AX,SI)
131	MOVL SI,tmp1+TMP1(SP)
132	BODY0(16,FN1,0x5a827999,SI,CX,DX,tmp2+TMP2(SP),AX)
133	MOVL tmp2+TMP2(SP),SI
134
135	ADDQ $20,BX
136	ADDQ $20,BP
137	CMPQ BP,w15+W15(SP)
138	JCS loop1
139
140	BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI)
141	ADDQ $4,BX
142	MOVQ BX,R8
143	MOVQ tmp1+TMP1(SP),BX
144
145	BODY(4,FN1,0x5a827999,SI,AX,BX,CX,DX)
146	BODY(8,FN1,0x5a827999,DX,SI,AX,BX,CX)
147	BODY(12,FN1,0x5a827999,CX,DX,SI,AX,BX)
148	BODY(16,FN1,0x5a827999,BX,CX,DX,SI,AX)
149
150	ADDQ $20,BP
151
152loop2:
153	BODY(0,FN24,0x6ed9eba1,AX,BX,CX,DX,SI)
154	BODY(4,FN24,0x6ed9eba1,SI,AX,BX,CX,DX)
155	BODY(8,FN24,0x6ed9eba1,DX,SI,AX,BX,CX)
156	BODY(12,FN24,0x6ed9eba1,CX,DX,SI,AX,BX)
157	BODY(16,FN24,0x6ed9eba1,BX,CX,DX,SI,AX)
158
159	ADDQ $20,BP
160	CMPQ BP,w40+W40(SP)
161	JCS loop2
162
163loop3:
164	BODY(0,FN3,0x8f1bbcdc,AX,BX,CX,DX,SI)
165	BODY(4,FN3,0x8f1bbcdc,SI,AX,BX,CX,DX)
166	BODY(8,FN3,0x8f1bbcdc,DX,SI,AX,BX,CX)
167	BODY(12,FN3,0x8f1bbcdc,CX,DX,SI,AX,BX)
168	BODY(16,FN3,0x8f1bbcdc,BX,CX,DX,SI,AX)
169
170	ADDQ $20,BP
171	CMPQ BP,w60+W60(SP)
172	JCS loop3
173
174loop4:
175	BODY(0,FN24,0xca62c1d6,AX,BX,CX,DX,SI)
176	BODY(4,FN24,0xca62c1d6,SI,AX,BX,CX,DX)
177	BODY(8,FN24,0xca62c1d6,DX,SI,AX,BX,CX)
178	BODY(12,FN24,0xca62c1d6,CX,DX,SI,AX,BX)
179	BODY(16,FN24,0xca62c1d6,BX,CX,DX,SI,AX)
180
181	ADDQ $20,BP
182	CMPQ BP,w80+W80(SP)
183	JCS loop4
184
185	MOVQ state+STATE(FP),DI
186	ADDL AX,0(DI)
187	ADDL BX,4(DI)
188	ADDL CX,8(DI)
189	ADDL DX,12(DI)
190	ADDL SI,16(DI)
191
192	MOVQ edata+EDATA(SP),DI
193	CMPQ Rpdata,DI
194	JCS mainloop
195
196	RET
197	END
198