xref: /openbsd-src/lib/libcrypto/sha/asm/sha1-parisc.pl (revision 676d1ceb597ab4ef4e34622c4c77334e7abfd175)
1ec07fdf1Sdjm#!/usr/bin/env perl
2ec07fdf1Sdjm
3ec07fdf1Sdjm# ====================================================================
4ec07fdf1Sdjm# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5ec07fdf1Sdjm# project. The module is, however, dual licensed under OpenSSL and
6ec07fdf1Sdjm# CRYPTOGAMS licenses depending on where you obtain it. For further
7ec07fdf1Sdjm# details see http://www.openssl.org/~appro/cryptogams/.
8ec07fdf1Sdjm# ====================================================================
9ec07fdf1Sdjm
10ec07fdf1Sdjm# SHA1 block procedure for PA-RISC.
11ec07fdf1Sdjm
12ec07fdf1Sdjm# June 2009.
13ec07fdf1Sdjm#
14ec07fdf1Sdjm# On PA-7100LC performance is >30% better than gcc 3.2 generated code
15ec07fdf1Sdjm# for aligned input and >50% better for unaligned. Compared to vendor
16ec07fdf1Sdjm# compiler on PA-8600 it's almost 60% faster in 64-bit build and just
17ec07fdf1Sdjm# few percent faster in 32-bit one (this for aligned input, data for
18ec07fdf1Sdjm# unaligned input is not available).
19ec07fdf1Sdjm#
20ec07fdf1Sdjm# Special thanks to polarhome.com for providing HP-UX account.
21ec07fdf1Sdjm
22ec07fdf1Sdjm$flavour = shift;
23ec07fdf1Sdjm$output = shift;
24ec07fdf1Sdjmopen STDOUT,">$output";
25ec07fdf1Sdjm
26ec07fdf1Sdjmif ($flavour =~ /64/) {
27ec07fdf1Sdjm	$LEVEL		="2.0W";
28ec07fdf1Sdjm	$SIZE_T		=8;
29ec07fdf1Sdjm	$FRAME_MARKER	=80;
30ec07fdf1Sdjm	$SAVED_RP	=16;
31ec07fdf1Sdjm	$PUSH		="std";
32ec07fdf1Sdjm	$PUSHMA		="std,ma";
33ec07fdf1Sdjm	$POP		="ldd";
34ec07fdf1Sdjm	$POPMB		="ldd,mb";
35ec07fdf1Sdjm} else {
36ec07fdf1Sdjm	$LEVEL		="1.0";
37ec07fdf1Sdjm	$SIZE_T		=4;
38ec07fdf1Sdjm	$FRAME_MARKER	=48;
39ec07fdf1Sdjm	$SAVED_RP	=20;
40ec07fdf1Sdjm	$PUSH		="stw";
41ec07fdf1Sdjm	$PUSHMA		="stwm";
42ec07fdf1Sdjm	$POP		="ldw";
43ec07fdf1Sdjm	$POPMB		="ldwm";
44ec07fdf1Sdjm}
45ec07fdf1Sdjm
46ec07fdf1Sdjm$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker
47ec07fdf1Sdjm				#                 [+ argument transfer]
48ec07fdf1Sdjm$ctx="%r26";		# arg0
49ec07fdf1Sdjm$inp="%r25";		# arg1
50ec07fdf1Sdjm$num="%r24";		# arg2
51ec07fdf1Sdjm
52ec07fdf1Sdjm$t0="%r28";
53ec07fdf1Sdjm$t1="%r29";
54ec07fdf1Sdjm$K="%r31";
55ec07fdf1Sdjm
56ec07fdf1Sdjm@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
57ec07fdf1Sdjm    "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0);
58ec07fdf1Sdjm
59ec07fdf1Sdjm@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23");
60ec07fdf1Sdjm
61ec07fdf1Sdjmsub BODY_00_19 {
62ec07fdf1Sdjmmy ($i,$a,$b,$c,$d,$e)=@_;
63ec07fdf1Sdjmmy $j=$i+1;
64ec07fdf1Sdjm$code.=<<___ if ($i<15);
65ec07fdf1Sdjm	addl	$K,$e,$e	; $i
66ec07fdf1Sdjm	shd	$a,$a,27,$t1
67ec07fdf1Sdjm	addl	@X[$i],$e,$e
68ec07fdf1Sdjm	and	$c,$b,$t0
69ec07fdf1Sdjm	addl	$t1,$e,$e
70ec07fdf1Sdjm	andcm	$d,$b,$t1
71ec07fdf1Sdjm	shd	$b,$b,2,$b
72ec07fdf1Sdjm	or	$t1,$t0,$t0
73ec07fdf1Sdjm	addl	$t0,$e,$e
74ec07fdf1Sdjm___
75ec07fdf1Sdjm$code.=<<___ if ($i>=15);	# with forward Xupdate
76ec07fdf1Sdjm	addl	$K,$e,$e	; $i
77ec07fdf1Sdjm	shd	$a,$a,27,$t1
78ec07fdf1Sdjm	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
79ec07fdf1Sdjm	addl	@X[$i%16],$e,$e
80ec07fdf1Sdjm	and	$c,$b,$t0
81ec07fdf1Sdjm	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
82ec07fdf1Sdjm	addl	$t1,$e,$e
83ec07fdf1Sdjm	andcm	$d,$b,$t1
84ec07fdf1Sdjm	shd	$b,$b,2,$b
85ec07fdf1Sdjm	or	$t1,$t0,$t0
86ec07fdf1Sdjm	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
87ec07fdf1Sdjm	add	$t0,$e,$e
88ec07fdf1Sdjm	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
89ec07fdf1Sdjm___
90ec07fdf1Sdjm}
91ec07fdf1Sdjm
92ec07fdf1Sdjmsub BODY_20_39 {
93ec07fdf1Sdjmmy ($i,$a,$b,$c,$d,$e)=@_;
94ec07fdf1Sdjmmy $j=$i+1;
95ec07fdf1Sdjm$code.=<<___ if ($i<79);
96ec07fdf1Sdjm	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]	; $i
97ec07fdf1Sdjm	addl	$K,$e,$e
98ec07fdf1Sdjm	shd	$a,$a,27,$t1
99ec07fdf1Sdjm	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
100ec07fdf1Sdjm	addl	@X[$i%16],$e,$e
101ec07fdf1Sdjm	xor	$b,$c,$t0
102ec07fdf1Sdjm	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
103ec07fdf1Sdjm	addl	$t1,$e,$e
104ec07fdf1Sdjm	shd	$b,$b,2,$b
105ec07fdf1Sdjm	xor	$d,$t0,$t0
106ec07fdf1Sdjm	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
107ec07fdf1Sdjm	addl	$t0,$e,$e
108ec07fdf1Sdjm___
109ec07fdf1Sdjm$code.=<<___ if ($i==79);	# with context load
110ec07fdf1Sdjm	ldw	0($ctx),@X[0]	; $i
111ec07fdf1Sdjm	addl	$K,$e,$e
112ec07fdf1Sdjm	shd	$a,$a,27,$t1
113ec07fdf1Sdjm	ldw	4($ctx),@X[1]
114ec07fdf1Sdjm	addl	@X[$i%16],$e,$e
115ec07fdf1Sdjm	xor	$b,$c,$t0
116ec07fdf1Sdjm	ldw	8($ctx),@X[2]
117ec07fdf1Sdjm	addl	$t1,$e,$e
118ec07fdf1Sdjm	shd	$b,$b,2,$b
119ec07fdf1Sdjm	xor	$d,$t0,$t0
120ec07fdf1Sdjm	ldw	12($ctx),@X[3]
121ec07fdf1Sdjm	addl	$t0,$e,$e
122ec07fdf1Sdjm	ldw	16($ctx),@X[4]
123ec07fdf1Sdjm___
124ec07fdf1Sdjm}
125ec07fdf1Sdjm
126ec07fdf1Sdjmsub BODY_40_59 {
127ec07fdf1Sdjmmy ($i,$a,$b,$c,$d,$e)=@_;
128ec07fdf1Sdjmmy $j=$i+1;
129ec07fdf1Sdjm$code.=<<___;
130ec07fdf1Sdjm	shd	$a,$a,27,$t1	; $i
131ec07fdf1Sdjm	addl	$K,$e,$e
132ec07fdf1Sdjm	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
133ec07fdf1Sdjm	xor	$d,$c,$t0
134ec07fdf1Sdjm	addl	@X[$i%16],$e,$e
135ec07fdf1Sdjm	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
136ec07fdf1Sdjm	and	$b,$t0,$t0
137ec07fdf1Sdjm	addl	$t1,$e,$e
138ec07fdf1Sdjm	shd	$b,$b,2,$b
139ec07fdf1Sdjm	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
140ec07fdf1Sdjm	addl	$t0,$e,$e
141ec07fdf1Sdjm	and	$d,$c,$t1
142ec07fdf1Sdjm	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
143ec07fdf1Sdjm	addl	$t1,$e,$e
144ec07fdf1Sdjm___
145ec07fdf1Sdjm}
146ec07fdf1Sdjm
147ec07fdf1Sdjm$code=<<___;
148ec07fdf1Sdjm	.LEVEL	$LEVEL
149*88da0340Smiod	.text
150ec07fdf1Sdjm
151ec07fdf1Sdjm	.EXPORT	sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
152ec07fdf1Sdjmsha1_block_data_order
153ec07fdf1Sdjm	.PROC
154ec07fdf1Sdjm	.CALLINFO	FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16
155ec07fdf1Sdjm	.ENTRY
156ec07fdf1Sdjm	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue
157ec07fdf1Sdjm	$PUSHMA	%r3,$FRAME(%sp)
158ec07fdf1Sdjm	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp)
159ec07fdf1Sdjm	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp)
160ec07fdf1Sdjm	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp)
161ec07fdf1Sdjm	$PUSH	%r7,`-$FRAME+4*$SIZE_T`(%sp)
162ec07fdf1Sdjm	$PUSH	%r8,`-$FRAME+5*$SIZE_T`(%sp)
163ec07fdf1Sdjm	$PUSH	%r9,`-$FRAME+6*$SIZE_T`(%sp)
164ec07fdf1Sdjm	$PUSH	%r10,`-$FRAME+7*$SIZE_T`(%sp)
165ec07fdf1Sdjm	$PUSH	%r11,`-$FRAME+8*$SIZE_T`(%sp)
166ec07fdf1Sdjm	$PUSH	%r12,`-$FRAME+9*$SIZE_T`(%sp)
167ec07fdf1Sdjm	$PUSH	%r13,`-$FRAME+10*$SIZE_T`(%sp)
168ec07fdf1Sdjm	$PUSH	%r14,`-$FRAME+11*$SIZE_T`(%sp)
169ec07fdf1Sdjm	$PUSH	%r15,`-$FRAME+12*$SIZE_T`(%sp)
170ec07fdf1Sdjm	$PUSH	%r16,`-$FRAME+13*$SIZE_T`(%sp)
171ec07fdf1Sdjm
172ec07fdf1Sdjm	ldw	0($ctx),$A
173ec07fdf1Sdjm	ldw	4($ctx),$B
174ec07fdf1Sdjm	ldw	8($ctx),$C
175ec07fdf1Sdjm	ldw	12($ctx),$D
176ec07fdf1Sdjm	ldw	16($ctx),$E
177ec07fdf1Sdjm
178ec07fdf1Sdjm	extru	$inp,31,2,$t0		; t0=inp&3;
179ec07fdf1Sdjm	sh3addl	$t0,%r0,$t0		; t0*=8;
180ec07fdf1Sdjm	subi	32,$t0,$t0		; t0=32-t0;
181ec07fdf1Sdjm	mtctl	$t0,%cr11		; %sar=t0;
182ec07fdf1Sdjm
183ec07fdf1SdjmL\$oop
184ec07fdf1Sdjm	ldi	3,$t0
185ec07fdf1Sdjm	andcm	$inp,$t0,$t0		; 64-bit neutral
186ec07fdf1Sdjm___
187ec07fdf1Sdjm	for ($i=0;$i<15;$i++) {		# load input block
188ec07fdf1Sdjm	$code.="\tldw	`4*$i`($t0),@X[$i]\n";		}
189ec07fdf1Sdjm$code.=<<___;
190ec07fdf1Sdjm	cmpb,*=	$inp,$t0,L\$aligned
191ec07fdf1Sdjm	ldw	60($t0),@X[15]
192ec07fdf1Sdjm	ldw	64($t0),@X[16]
193ec07fdf1Sdjm___
194ec07fdf1Sdjm	for ($i=0;$i<16;$i++) {		# align input
195ec07fdf1Sdjm	$code.="\tvshd	@X[$i],@X[$i+1],@X[$i]\n";	}
196ec07fdf1Sdjm$code.=<<___;
197ec07fdf1SdjmL\$aligned
198ec07fdf1Sdjm	ldil	L'0x5a827000,$K		; K_00_19
199ec07fdf1Sdjm	ldo	0x999($K),$K
200ec07fdf1Sdjm___
201ec07fdf1Sdjmfor ($i=0;$i<20;$i++)   { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
202ec07fdf1Sdjm$code.=<<___;
203ec07fdf1Sdjm	ldil	L'0x6ed9e000,$K		; K_20_39
204ec07fdf1Sdjm	ldo	0xba1($K),$K
205ec07fdf1Sdjm___
206ec07fdf1Sdjm
207ec07fdf1Sdjmfor (;$i<40;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
208ec07fdf1Sdjm$code.=<<___;
209ec07fdf1Sdjm	ldil	L'0x8f1bb000,$K		; K_40_59
210ec07fdf1Sdjm	ldo	0xcdc($K),$K
211ec07fdf1Sdjm___
212ec07fdf1Sdjm
213ec07fdf1Sdjmfor (;$i<60;$i++)       { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
214ec07fdf1Sdjm$code.=<<___;
215ec07fdf1Sdjm	ldil	L'0xca62c000,$K		; K_60_79
216ec07fdf1Sdjm	ldo	0x1d6($K),$K
217ec07fdf1Sdjm___
218ec07fdf1Sdjmfor (;$i<80;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
219ec07fdf1Sdjm
220ec07fdf1Sdjm$code.=<<___;
221ec07fdf1Sdjm	addl	@X[0],$A,$A
222ec07fdf1Sdjm	addl	@X[1],$B,$B
223ec07fdf1Sdjm	addl	@X[2],$C,$C
224ec07fdf1Sdjm	addl	@X[3],$D,$D
225ec07fdf1Sdjm	addl	@X[4],$E,$E
226ec07fdf1Sdjm	stw	$A,0($ctx)
227ec07fdf1Sdjm	stw	$B,4($ctx)
228ec07fdf1Sdjm	stw	$C,8($ctx)
229ec07fdf1Sdjm	stw	$D,12($ctx)
230ec07fdf1Sdjm	stw	$E,16($ctx)
231ec07fdf1Sdjm	addib,*<> -1,$num,L\$oop
232ec07fdf1Sdjm	ldo	64($inp),$inp
233ec07fdf1Sdjm
234ec07fdf1Sdjm	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2	; standard epilogue
235ec07fdf1Sdjm	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4
236ec07fdf1Sdjm	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5
237ec07fdf1Sdjm	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6
238ec07fdf1Sdjm	$POP	`-$FRAME+4*$SIZE_T`(%sp),%r7
239ec07fdf1Sdjm	$POP	`-$FRAME+5*$SIZE_T`(%sp),%r8
240ec07fdf1Sdjm	$POP	`-$FRAME+6*$SIZE_T`(%sp),%r9
241ec07fdf1Sdjm	$POP	`-$FRAME+7*$SIZE_T`(%sp),%r10
242ec07fdf1Sdjm	$POP	`-$FRAME+8*$SIZE_T`(%sp),%r11
243ec07fdf1Sdjm	$POP	`-$FRAME+9*$SIZE_T`(%sp),%r12
244ec07fdf1Sdjm	$POP	`-$FRAME+10*$SIZE_T`(%sp),%r13
245ec07fdf1Sdjm	$POP	`-$FRAME+11*$SIZE_T`(%sp),%r14
246ec07fdf1Sdjm	$POP	`-$FRAME+12*$SIZE_T`(%sp),%r15
247ec07fdf1Sdjm	$POP	`-$FRAME+13*$SIZE_T`(%sp),%r16
248ec07fdf1Sdjm	bv	(%r2)
249ec07fdf1Sdjm	.EXIT
250ec07fdf1Sdjm	$POPMB	-$FRAME(%sp),%r3
251ec07fdf1Sdjm	.PROCEND
252ec07fdf1Sdjm___
253ec07fdf1Sdjm
254ec07fdf1Sdjm$code =~ s/\`([^\`]*)\`/eval $1/gem;
255ec07fdf1Sdjm$code =~ s/,\*/,/gm		if ($SIZE_T==4);
2569eac5592Smiod$code =~ s/\bbv\b/bve/gm	if ($SIZE_T==8);
257ec07fdf1Sdjmprint $code;
258ec07fdf1Sdjmclose STDOUT;
259