xref: /netbsd-src/crypto/external/bsd/openssl/dist/crypto/sha/asm/sha1-mips.pl (revision bbde328be4e75ea9ad02e9715ea13ca54b797ada)
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA1 block procedure for MIPS.
11
12# Performance improvement is 30% on unaligned input. The "secret" is
13# to deploy lwl/lwr pair to load unaligned input. One could have
14# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32-
15# compatible subroutine. There is room for minor optimization on
16# little-endian platforms...
17#
18# The code is somewhat IRIX-centric, i.e. is likely to require minor
19# adaptations for other OSes...
20
21for (@ARGV) {   $big_endian=1 if (/\-DB_ENDIAN/);
22                $big_endian=0 if (/\-DL_ENDIAN/);   }
23if (!defined($big_endian))
24            {   $big_endian=(unpack('L',pack('N',1))==1);   }
25
26# offsets of the Most and Least Significant Bytes
27$MSB=$big_endian?0:3;
28$LSB=3&~$MSB;
29
30@X=(	"\$8",	"\$9",	"\$10",	"\$11",	"\$12",	"\$13",	"\$14",	"\$15",
31	"\$16",	"\$17",	"\$18",	"\$19",	"\$20",	"\$21",	"\$22",	"\$23");
32$ctx="\$4";	# a0
33$inp="\$5";	# a1
34$num="\$6";	# a2
35$A="\$1";
36$B="\$2";
37$C="\$3";
38$D="\$7";
39$E="\$24";	@V=($A,$B,$C,$D,$E);
40$t0="\$25";	# jp,t9
41$t1="\$28";	# gp
42$t2="\$30";	# fp,s8
43$K="\$31";	# ra
44
45$FRAMESIZE=16;
46
47sub BODY_00_14 {
48my ($i,$a,$b,$c,$d,$e)=@_;
49my $j=$i+1;
50$code.=<<___	if (!$big_endian);
51	srl	$t0,@X[$i],24	# byte swap($i)
52	srl	$t1,@X[$i],8
53	andi	$t2,@X[$i],0xFF00
54	sll	@X[$i],@X[$i],24
55	andi	$t1,0xFF00
56	sll	$t2,$t2,8
57	or	@X[$i],$t0
58	or	@X[$i],$t1
59	or	@X[$i],$t2
60___
61$code.=<<___;
62	 lwl	@X[$j],$j*4+$MSB($inp)
63	sll	$t0,$a,5	# $i
64	addu	$e,$K
65	 lwr	@X[$j],$j*4+$LSB($inp)
66	srl	$t1,$a,27
67	addu	$e,$t0
68	xor	$t0,$c,$d
69	addu	$e,$t1
70	sll	$t2,$b,30
71	and	$t0,$b
72	srl	$b,$b,2
73	xor	$t0,$d
74	addu	$e,@X[$i]
75	or	$b,$t2
76	addu	$e,$t0
77___
78}
79
80sub BODY_15_19 {
81my ($i,$a,$b,$c,$d,$e)=@_;
82my $j=$i+1;
83
84$code.=<<___	if (!$big_endian && $i==15);
85	srl	$t0,@X[$i],24	# byte swap($i)
86	srl	$t1,@X[$i],8
87	andi	$t2,@X[$i],0xFF00
88	sll	@X[$i],@X[$i],24
89	andi	$t1,0xFF00
90	sll	$t2,$t2,8
91	or	@X[$i],$t0
92	or	@X[$i],$t1
93	or	@X[$i],$t2
94___
95$code.=<<___;
96	 xor	@X[$j%16],@X[($j+2)%16]
97	sll	$t0,$a,5	# $i
98	addu	$e,$K
99	srl	$t1,$a,27
100	addu	$e,$t0
101	 xor	@X[$j%16],@X[($j+8)%16]
102	xor	$t0,$c,$d
103	addu	$e,$t1
104	 xor	@X[$j%16],@X[($j+13)%16]
105	sll	$t2,$b,30
106	and	$t0,$b
107	 srl	$t1,@X[$j%16],31
108	 addu	@X[$j%16],@X[$j%16]
109	srl	$b,$b,2
110	xor	$t0,$d
111	 or	@X[$j%16],$t1
112	addu	$e,@X[$i%16]
113	or	$b,$t2
114	addu	$e,$t0
115___
116}
117
118sub BODY_20_39 {
119my ($i,$a,$b,$c,$d,$e)=@_;
120my $j=$i+1;
121$code.=<<___ if ($i<79);
122	 xor	@X[$j%16],@X[($j+2)%16]
123	sll	$t0,$a,5	# $i
124	addu	$e,$K
125	srl	$t1,$a,27
126	addu	$e,$t0
127	 xor	@X[$j%16],@X[($j+8)%16]
128	xor	$t0,$c,$d
129	addu	$e,$t1
130	 xor	@X[$j%16],@X[($j+13)%16]
131	sll	$t2,$b,30
132	xor	$t0,$b
133	 srl	$t1,@X[$j%16],31
134	 addu	@X[$j%16],@X[$j%16]
135	srl	$b,$b,2
136	addu	$e,@X[$i%16]
137	 or	@X[$j%16],$t1
138	or	$b,$t2
139	addu	$e,$t0
140___
141$code.=<<___ if ($i==79);
142	 lw	@X[0],0($ctx)
143	sll	$t0,$a,5	# $i
144	addu	$e,$K
145	 lw	@X[1],4($ctx)
146	srl	$t1,$a,27
147	addu	$e,$t0
148	 lw	@X[2],8($ctx)
149	xor	$t0,$c,$d
150	addu	$e,$t1
151	 lw	@X[3],12($ctx)
152	sll	$t2,$b,30
153	xor	$t0,$b
154	 lw	@X[4],16($ctx)
155	srl	$b,$b,2
156	addu	$e,@X[$i%16]
157	or	$b,$t2
158	addu	$e,$t0
159___
160}
161
162sub BODY_40_59 {
163my ($i,$a,$b,$c,$d,$e)=@_;
164my $j=$i+1;
165$code.=<<___ if ($i<79);
166	 xor	@X[$j%16],@X[($j+2)%16]
167	sll	$t0,$a,5	# $i
168	addu	$e,$K
169	srl	$t1,$a,27
170	addu	$e,$t0
171	 xor	@X[$j%16],@X[($j+8)%16]
172	and	$t0,$c,$d
173	addu	$e,$t1
174	 xor	@X[$j%16],@X[($j+13)%16]
175	sll	$t2,$b,30
176	addu	$e,$t0
177	 srl	$t1,@X[$j%16],31
178	xor	$t0,$c,$d
179	 addu	@X[$j%16],@X[$j%16]
180	and	$t0,$b
181	srl	$b,$b,2
182	 or	@X[$j%16],$t1
183	addu	$e,@X[$i%16]
184	or	$b,$t2
185	addu	$e,$t0
186___
187}
188
189$code=<<___;
190#include <asm.h>
191#include <regdef.h>
192
193.text
194
195.set	noat
196.set	noreorder
197.align	5
198.globl	sha1_block_data_order
199.ent	sha1_block_data_order
200sha1_block_data_order:
201	.frame	sp,$FRAMESIZE*SZREG,zero
202	.mask	0xd0ff0000,-$FRAMESIZE*SZREG
203	.set	noreorder
204	PTR_SUB	sp,$FRAMESIZE*SZREG
205	REG_S	\$31,($FRAMESIZE-1)*SZREG(sp)
206	REG_S	\$30,($FRAMESIZE-2)*SZREG(sp)
207	REG_S	\$28,($FRAMESIZE-3)*SZREG(sp)
208	REG_S	\$23,($FRAMESIZE-4)*SZREG(sp)
209	REG_S	\$22,($FRAMESIZE-5)*SZREG(sp)
210	REG_S	\$21,($FRAMESIZE-6)*SZREG(sp)
211	REG_S	\$20,($FRAMESIZE-7)*SZREG(sp)
212	REG_S	\$19,($FRAMESIZE-8)*SZREG(sp)
213	REG_S	\$18,($FRAMESIZE-9)*SZREG(sp)
214	REG_S	\$17,($FRAMESIZE-10)*SZREG(sp)
215	REG_S	\$16,($FRAMESIZE-11)*SZREG(sp)
216
217	lw	$A,0($ctx)
218	lw	$B,4($ctx)
219	lw	$C,8($ctx)
220	lw	$D,12($ctx)
221	b	.Loop
222	lw	$E,16($ctx)
223.align	4
224.Loop:
225	.set	reorder
226	lwl	@X[0],$MSB($inp)
227	lui	$K,0x5a82
228	lwr	@X[0],$LSB($inp)
229	ori	$K,0x7999	# K_00_19
230___
231for ($i=0;$i<15;$i++)	{ &BODY_00_14($i,@V); unshift(@V,pop(@V)); }
232for (;$i<20;$i++)	{ &BODY_15_19($i,@V); unshift(@V,pop(@V)); }
233$code.=<<___;
234	lui	$K,0x6ed9
235	ori	$K,0xeba1	# K_20_39
236___
237for (;$i<40;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
238$code.=<<___;
239	lui	$K,0x8f1b
240	ori	$K,0xbcdc	# K_40_59
241___
242for (;$i<60;$i++)	{ &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
243$code.=<<___;
244	lui	$K,0xca62
245	ori	$K,0xc1d6	# K_60_79
246___
247for (;$i<80;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
248$code.=<<___;
249	addu	$A,$X[0]
250	addu	$B,$X[1]
251	sw	$A,0($ctx)
252	addu	$C,$X[2]
253	addu	$D,$X[3]
254	sw	$B,4($ctx)
255	addu	$E,$X[4]
256	PTR_SUB	$num,1
257	sw	$C,8($ctx)
258	sw	$D,12($ctx)
259	sw	$E,16($ctx)
260	.set	noreorder
261	bnez	$num,.Loop
262	PTR_ADD	$inp,64
263
264	.set	noreorder
265	REG_L	\$31,($FRAMESIZE-1)*SZREG(sp)
266	REG_L	\$30,($FRAMESIZE-2)*SZREG(sp)
267	REG_L	\$28,($FRAMESIZE-3)*SZREG(sp)
268	REG_L	\$23,($FRAMESIZE-4)*SZREG(sp)
269	REG_L	\$22,($FRAMESIZE-5)*SZREG(sp)
270	REG_L	\$21,($FRAMESIZE-6)*SZREG(sp)
271	REG_L	\$20,($FRAMESIZE-7)*SZREG(sp)
272	REG_L	\$19,($FRAMESIZE-8)*SZREG(sp)
273	REG_L	\$18,($FRAMESIZE-9)*SZREG(sp)
274	REG_L	\$17,($FRAMESIZE-10)*SZREG(sp)
275	REG_L	\$16,($FRAMESIZE-11)*SZREG(sp)
276	jr	ra
277	PTR_ADD	sp,$FRAMESIZE*SZREG
278.end	sha1_block_data_order
279___
280print $code;
281close STDOUT;
282