xref: /openbsd-src/sys/arch/hppa/spmath/impyu.S (revision fe25cd0b3859548b9ec5895ae546bdf753e2d044)
1*fe25cd0bSderaadt/*	$OpenBSD: impyu.S,v 1.12 2011/04/16 20:52:12 deraadt Exp $	*/
28a472b3eSmickey/*
3c2feb252Smickey  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
4c2feb252Smickey  To anyone who acknowledges that this file is provided "AS IS"
5c2feb252Smickey  without any express or implied warranty:
6c2feb252Smickey      permission to use, copy, modify, and distribute this file
7c2feb252Smickey  for any purpose is hereby granted without fee, provided that
8c2feb252Smickey  the above copyright notice and this notice appears in all
9c2feb252Smickey  copies, and that the name of Hewlett-Packard Company not be
10c2feb252Smickey  used in advertising or publicity pertaining to distribution
11c2feb252Smickey  of the software without specific, written prior permission.
12c2feb252Smickey  Hewlett-Packard Company makes no representations about the
13c2feb252Smickey  suitability of this software for any purpose.
148a472b3eSmickey*/
15c2feb252Smickey/* @(#)impyu.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:31 */
168a472b3eSmickey
178a472b3eSmickey#include <machine/asm.h>
1838f50ff7Smickey#include <machine/frame.h>
198a472b3eSmickey
20c2feb252Smickey;****************************************************************************
21c2feb252Smickey;
22c2feb252Smickey;Implement an integer multiply routine for 32-bit operands and 64-bit product
23c2feb252Smickey; with operand values of zero (multiplicand only) and 2**32reated specially.
24c2feb252Smickey; The algorithm uses the multiplier, four bits at a time, from right to left,
25c2feb252Smickey; to generate partial product.  Execution speed is more important than program
26c2feb252Smickey; size in this implementation.
27c2feb252Smickey;
28c2feb252Smickey;******************************************************************************
29b94afd46Smickey;
30b94afd46Smickey; Definitions - General registers
31b94afd46Smickey;
32ba8556a4Smickeygr0	.reg		%r0		; General register zero
33ba8556a4Smickeypu	.reg		%r3		; upper part of product
34ba8556a4Smickeypl	.reg		%r4		; lower part of product
35ba8556a4Smickeyop2	.reg		%r4		; multiplier
36ba8556a4Smickeyop1	.reg		%r5		; multiplicand
37ba8556a4Smickeycnt	.reg		%r6		; count in multiply
38ba8556a4Smickeybrindex	.reg		%r7		; index into the br. table
39ba8556a4Smickeysaveop2	.reg		%r8		; save op2 if high bit of multiplicand
40b94afd46Smickey					; is set
41ba8556a4Smickeypc	.reg		%r9		; carry bit of product, = 00...01
42ba8556a4Smickeypm	.reg		%r10		; value of -1 used in shifting
43ba8556a4Smickeytemp	.reg		%r6
448a472b3eSmickey
45c2feb252Smickey;****************************************************************************
46c2feb252Smickey	.text
477eec34daSmickeyLEAF_ENTRY(u_xmpy)
484f23d96fSmickey	stws,ma		pu,4(sp)		; save registers on stack
49b94afd46Smickey	stws,ma		pl,4(sp)		; save registers on stack
50b94afd46Smickey	stws,ma		op1,4(sp)		; save registers on stack
51b94afd46Smickey	stws,ma		cnt,4(sp)		; save registers on stack
52b94afd46Smickey	stws,ma		brindex,4(sp)		; save registers on stack
53b94afd46Smickey	stws,ma		saveop2,4(sp)		; save registers on stack
54b94afd46Smickey	stws,ma		pc,4(sp)		; save registers on stack
55b94afd46Smickey	stws,ma		pm,4(sp)		; save registers on stack
56b94afd46Smickey;
57b94afd46Smickey;   Start multiply process
58b94afd46Smickey;
59b94afd46Smickey	ldws		0(arg0),op1		; get multiplicand
60b94afd46Smickey	ldws		0(arg1),op2		; get multiplier
61b94afd46Smickey	addib,=		0,op1,fini0		; op1 = 0, product = 0
62b94afd46Smickey	addi		0,gr0,pu		; clear product
63b94afd46Smickey	bb,>=		op1,0,mpy1		; test msb of multiplicand
64b94afd46Smickey	addi		0,gr0,saveop2		; clear saveop2
65b94afd46Smickey;
66b94afd46Smickey; msb of multiplicand is set so will save multiplier for a final
67b94afd46Smickey; addition into the result
68b94afd46Smickey;
69b94afd46Smickey	extru,=		op1,31,31,op1		; clear msb of multiplicand
70b94afd46Smickey	b		mpy1			; if op1 < 2**32, start multiply
71b94afd46Smickey	add		op2,gr0,saveop2		;   save op2 in saveop2
72b94afd46Smickey	shd		gr0,op2,1,pu		; shift op2 left 31 for result
73b94afd46Smickey	b		fini			; go to finish
748a472b3eSmickey	shd		op2,gr0,1,pl
75b94afd46Smickey;
76b94afd46Smickeympy1	addi		-1,gr0,pm		; initialize pm to 111...1
77b94afd46Smickey	addi		1,gr0,pc		; initialize pc to 00...01
78b94afd46Smickey	movib,tr	8,cnt,mloop		; set count for mpy loop
79b94afd46Smickey	extru		op2,31,4,brindex	; 4 bits as index into table
80b94afd46Smickey;
818a472b3eSmickey	.align		8
82b94afd46Smickey;
83b94afd46Smickey	b		sh4c			; br. if sign overflow
84b94afd46Smickeysh4n	shd		pu,pl,4,pl		; shift product right 4 bits
85b94afd46Smickey	addib,<=	-1,cnt,mulend		; reduce count by 1, exit if
86b94afd46Smickey	extru		pu,27,28,pu		;   <= zero
87b94afd46Smickey;
88b94afd46Smickeymloop	blr		brindex,gr0		; br. into table
89b94afd46Smickey						;   entries of 2 words
90b94afd46Smickey	extru		op2,27,4,brindex	; next 4 bits into index
91b94afd46Smickey;
92b94afd46Smickey;
93b94afd46Smickey;	branch table for the multiplication process with four multiplier bits
94b94afd46Smickey;
95b94afd46Smickeymtable						; two words per entry
96b94afd46Smickey;
97b94afd46Smickey; ----	bits = 0000 ---- shift product 4 bits -------------------------------
98b94afd46Smickey;
99b94afd46Smickey	b		sh4n+4			; just shift partial
100b94afd46Smickey	shd		pu,pl,4,pl		;   product right 4 bits
101b94afd46Smickey;
102b94afd46Smickey;  ----	bits = 0001 ---- add op1, then shift 4 bits
103b94afd46Smickey;
104b94afd46Smickey	addb,tr		op1,pu,sh4n+4		; add op1 to product, to shift
105b94afd46Smickey	shd		pu,pl,4,pl		;   product right 4 bits
106b94afd46Smickey;
107b94afd46Smickey;  ----	bits = 0010 ---- add op1, add op1, then shift 4 bits
108b94afd46Smickey;
109b94afd46Smickey	addb,tr		op1,pu,sh4n		; add 2*op1, to shift
110b94afd46Smickey	addb,uv		op1,pu,sh4c		;   product right 4 bits
111b94afd46Smickey;
112b94afd46Smickey;  ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
113b94afd46Smickey;
114b94afd46Smickey	addb,tr		op1,pu,sh4n-4		; add op1 & 2*op1, shift
115b94afd46Smickey	sh1add,nuv	op1,pu,pu			;   product right 4 bits
116b94afd46Smickey;
117b94afd46Smickey;  ----	bits = 0100 ---- shift 2, add op1, shift 2
118b94afd46Smickey;
1198a472b3eSmickey	b		sh2sa
120b94afd46Smickey	shd		pu,pl,2,pl		; shift product 2 bits
121b94afd46Smickey;
122b94afd46Smickey;  ----	bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
123b94afd46Smickey;
124b94afd46Smickey	addb,tr		op1,pu,sh2us		; add op1 to product
125b94afd46Smickey	shd		pu,pl,2,pl		; shift 2 bits
126b94afd46Smickey;
127b94afd46Smickey;  ----	bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
128b94afd46Smickey;
129b94afd46Smickey	addb,tr		op1,pu,sh2c		; add 2*op1, to shift 2 bits
130b94afd46Smickey	addb,nuv	op1,pu,sh2us		; br. if not overflow
131b94afd46Smickey;
132b94afd46Smickey;  ----	bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
133b94afd46Smickey;
1348a472b3eSmickey	b		sh3s
135b94afd46Smickey	sub		pu,op1,pu		; subtract op1, br. to sh3s
1368a472b3eSmickey
137b94afd46Smickey;
138b94afd46Smickey;  ----	bits = 1000 ---- shift 3, add op1, shift 1
139b94afd46Smickey;
1408a472b3eSmickey	b		sh3sa
141b94afd46Smickey	shd		pu,pl,3,pl		; shift product right 3 bits
142b94afd46Smickey;
143b94afd46Smickey;  ----	bits = 1001 ---- add op1, shift 3, add op1, shift 1
144b94afd46Smickey;
145b94afd46Smickey	addb,tr		op1,pu,sh3us		; add op1, to shift 3, add op1,
146b94afd46Smickey	shd		pu,pl,3,pl		;   and shift 1
147b94afd46Smickey;
148b94afd46Smickey;  ----	bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
149b94afd46Smickey;
150b94afd46Smickey	addb,tr		op1,pu,sh3c		; add 2*op1, to shift 3 bits
151b94afd46Smickey	addb,nuv	op1,pu,sh3us		;   br. if no overflow
152b94afd46Smickey;
153b94afd46Smickey;  ----	bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
154b94afd46Smickey;
155b94afd46Smickey	addib,tr	1,brindex,sh2s		; add 1 to index, subtract op1,
156b94afd46Smickey	sub		pu,op1,pu		;   shift 2 with minus sign
157b94afd46Smickey;
158b94afd46Smickey;  ----	bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
159b94afd46Smickey;
160b94afd46Smickey	addib,tr	1,brindex,sh2sb		; add 1 to index, to shift
161b94afd46Smickey	shd		pu,pl,2,pl		; shift right 2 bits signed
162b94afd46Smickey;
163b94afd46Smickey;  ----	bits = 1101 ---- add op1, shift 2, add -op1, shift 2
164b94afd46Smickey;
165b94afd46Smickey	addb,tr		op1,pu,sh2ns		; add op1, to shift 2
166b94afd46Smickey	shd		pu,pl,2,pl		;   right 2 unsigned, etc.
167b94afd46Smickey;
168b94afd46Smickey;  ----	bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
169b94afd46Smickey;
170b94afd46Smickey	addib,tr	1,brindex,sh1sa		; add 1 to index, to shift
171b94afd46Smickey	shd		pu,pl,1,pl		; shift 1 bit
172b94afd46Smickey;
173b94afd46Smickey;  ----	bits = 1111 ---- add -op1, shift 4 signed
174b94afd46Smickey;
175b94afd46Smickey	addib,tr	1,brindex,sh4s		; add 1 to index, subtract op1,
176b94afd46Smickey	sub		pu,op1,pu		;   to shift 4 signed
1778a472b3eSmickey
178b94afd46Smickey;
179b94afd46Smickey;  ----	bits = 10000 ---- shift 4 signed
180b94afd46Smickey;
181b94afd46Smickey	addib,tr	1,brindex,sh4s+4		; add 1 to index
182b94afd46Smickey	shd		pu,pl,4,pl		; shift 4 signed
183b94afd46Smickey;
184b94afd46Smickey;  ---- end of table ---------------------------------------------------------
185b94afd46Smickey;
1868a472b3eSmickeysh4s	shd		pu,pl,4,pl
187b94afd46Smickey	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
188b94afd46Smickey	shd		pm,pu,4,pu		; shift 4, minus signed
189b94afd46Smickey	addb,tr		op1,pu,lastadd		; do one more add, then finish
190b94afd46Smickey	addb,=,n	saveop2,gr0,fini	; check saveop2
191b94afd46Smickey;
192b94afd46Smickeysh4c	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
193b94afd46Smickey	shd		pc,pu,4,pu		; shift 4 with overflow
194b94afd46Smickey	b		lastadd			; end of multiply
195b94afd46Smickey	addb,=,n	saveop2,gr0,fini	; check saveop2
196b94afd46Smickey;
197b94afd46Smickeysh3c	shd		pu,pl,3,pl		; shift product 3 bits
198b94afd46Smickey	shd		pc,pu,3,pu		; shift 3 signed
199b94afd46Smickey	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
2008a472b3eSmickey	shd		pu,pl,1,pl
201b94afd46Smickey;
202b94afd46Smickeysh3us	extru		pu,28,29,pu		; shift 3 unsigned
203b94afd46Smickey	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
2048a472b3eSmickey	shd		pu,pl,1,pl
205b94afd46Smickey;
206b94afd46Smickeysh3sa	extrs		pu,28,29,pu		; shift 3 signed
207b94afd46Smickey	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
2088a472b3eSmickey	shd		pu,pl,1,pl
209b94afd46Smickey;
210b94afd46Smickeysh3s	shd		pu,pl,3,pl		; shift 3 minus signed
2118a472b3eSmickey	shd		pm,pu,3,pu
212b94afd46Smickey	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
2138a472b3eSmickey	shd		pu,pl,1,pl
214b94afd46Smickey;
215b94afd46Smickeysh1	addib,>		-1,cnt,mloop		; loop if count > 0
2168a472b3eSmickey	extru		pu,30,31,pu
217b94afd46Smickey	b		lastadd			; end of multiply
218b94afd46Smickey	addb,=,n	saveop2,gr0,fini	; check saveop2
219b94afd46Smickey;
220b94afd46Smickeysh2ns	addib,tr	1,brindex,sh2sb+4	; increment index
221b94afd46Smickey	extru		pu,29,30,pu		; shift unsigned
222b94afd46Smickey;
223b94afd46Smickeysh2s	shd		pu,pl,2,pl		; shift with minus sign
224b94afd46Smickey	shd		pm,pu,2,pu		;
225b94afd46Smickey	sub		pu,op1,pu		; subtract op1
226b94afd46Smickey	shd		pu,pl,2,pl		; shift with minus sign
227b94afd46Smickey	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
228b94afd46Smickey	shd		pm,pu,2,pu		; shift with minus sign
229b94afd46Smickey	addb,tr		op1,pu,lastadd		; do one more add, then finish
230b94afd46Smickey	addb,=,n	saveop2,gr0,fini	; check saveop2
231b94afd46Smickey;
232b94afd46Smickeysh2sb	extrs		pu,29,30,pu		; shift 2 signed
233b94afd46Smickey	sub		pu,op1,pu		; subtract op1 from product
234b94afd46Smickey	shd		pu,pl,2,pl		; shift with minus sign
235b94afd46Smickey	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
236b94afd46Smickey	shd		pm,pu,2,pu		; shift with minus sign
237b94afd46Smickey	addb,tr		op1,pu,lastadd		; do one more add, then finish
238b94afd46Smickey	addb,=,n	saveop2,gr0,fini	; check saveop2
239b94afd46Smickey;
240b94afd46Smickeysh1sa	extrs		pu,30,31,pu		;   signed
241b94afd46Smickey	sub		pu,op1,pu		; subtract op1 from product
242b94afd46Smickey	shd		pu,pl,3,pl		; shift 3 with minus sign
243b94afd46Smickey	addib,>		-1,cnt,mloop		; decrement count, loop if >0
2448a472b3eSmickey	shd		pm,pu,3,pu
245b94afd46Smickey	addb,tr		op1,pu,lastadd		; do one more add, then finish
246b94afd46Smickey	addb,=,n	saveop2,gr0,fini	; check saveop2
247b94afd46Smickey;
248b94afd46Smickeyfini0	movib,tr	0,pl,fini		; product = 0 as op1 = 0
249b94afd46Smickey	stws		pu,0(arg2)		; save high part of result
250b94afd46Smickey;
251b94afd46Smickeysh2us	extru		pu,29,30,pu		; shift 2 unsigned
252b94afd46Smickey	addb,tr		op1,pu,sh2a		; add op1
253b94afd46Smickey	shd		pu,pl,2,pl		; shift 2 bits
254b94afd46Smickey;
2558a472b3eSmickeysh2c	shd		pu,pl,2,pl
256b94afd46Smickey	shd		pc,pu,2,pu		; shift with carry
257b94afd46Smickey	addb,tr		op1,pu,sh2a		; add op1 to product
258b94afd46Smickey	shd		pu,pl,2,pl		; br. to sh2 to shift pu
259b94afd46Smickey;
260b94afd46Smickeysh2sa	extrs		pu,29,30,pu		; shift with sign
261b94afd46Smickey	addb,tr		op1,pu,sh2a		; add op1 to product
262b94afd46Smickey	shd		pu,pl,2,pl		; br. to sh2 to shift pu
263b94afd46Smickey;
264b94afd46Smickeysh2a	addib,>		-1,cnt,mloop		; loop if count > 0
2658a472b3eSmickey	extru		pu,29,30,pu
266b94afd46Smickey;
267b94afd46Smickeymulend	addb,=,n	saveop2,gr0,fini	; check saveop2
268b94afd46Smickeylastadd	shd		saveop2,gr0,1,temp	;  if saveop2 <> 0, shift it
269b94afd46Smickey	shd		gr0,saveop2,1,saveop2	;  left 31 and add to result
2708a472b3eSmickey	add		pl,temp,pl
2718a472b3eSmickey	addc		pu,saveop2,pu
272b94afd46Smickey;
273b94afd46Smickey;	finish
274b94afd46Smickey;
275b94afd46Smickeyfini	stws		pu,0(arg2)		; save high part of result
276b94afd46Smickey	stws		pl,4(arg2)		; save low part of result
2778a472b3eSmickey
278b94afd46Smickey	ldws,mb		-4(sp),pm		; restore registers
279b94afd46Smickey	ldws,mb		-4(sp),pc		; restore registers
280b94afd46Smickey	ldws,mb		-4(sp),saveop2		; restore registers
281b94afd46Smickey	ldws,mb		-4(sp),brindex		; restore registers
282b94afd46Smickey	ldws,mb		-4(sp),cnt		; restore registers
283b94afd46Smickey	ldws,mb		-4(sp),op1		; restore registers
284b94afd46Smickey	ldws,mb		-4(sp),pl		; restore registers
285b94afd46Smickey	bv		0(rp)			; return
286b94afd46Smickey	ldws,mb		-4(sp),pu		; restore registers
2877eec34daSmickeyEXIT(u_xmpy)
2884f23d96fSmickey
2898a472b3eSmickey	.end
290