1*fe25cd0bSderaadt/* $OpenBSD: impyu.S,v 1.12 2011/04/16 20:52:12 deraadt Exp $ */ 28a472b3eSmickey/* 3c2feb252Smickey (c) Copyright 1986 HEWLETT-PACKARD COMPANY 4c2feb252Smickey To anyone who acknowledges that this file is provided "AS IS" 5c2feb252Smickey without any express or implied warranty: 6c2feb252Smickey permission to use, copy, modify, and distribute this file 7c2feb252Smickey for any purpose is hereby granted without fee, provided that 8c2feb252Smickey the above copyright notice and this notice appears in all 9c2feb252Smickey copies, and that the name of Hewlett-Packard Company not be 10c2feb252Smickey used in advertising or publicity pertaining to distribution 11c2feb252Smickey of the software without specific, written prior permission. 12c2feb252Smickey Hewlett-Packard Company makes no representations about the 13c2feb252Smickey suitability of this software for any purpose. 148a472b3eSmickey*/ 15c2feb252Smickey/* @(#)impyu.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:31 */ 168a472b3eSmickey 178a472b3eSmickey#include <machine/asm.h> 1838f50ff7Smickey#include <machine/frame.h> 198a472b3eSmickey 20c2feb252Smickey;**************************************************************************** 21c2feb252Smickey; 22c2feb252Smickey;Implement an integer multiply routine for 32-bit operands and 64-bit product 23c2feb252Smickey; with operand values of zero (multiplicand only) and 2**32reated specially. 24c2feb252Smickey; The algorithm uses the multiplier, four bits at a time, from right to left, 25c2feb252Smickey; to generate partial product. Execution speed is more important than program 26c2feb252Smickey; size in this implementation. 27c2feb252Smickey; 28c2feb252Smickey;****************************************************************************** 29b94afd46Smickey; 30b94afd46Smickey; Definitions - General registers 31b94afd46Smickey; 32ba8556a4Smickeygr0 .reg %r0 ; General register zero 33ba8556a4Smickeypu .reg %r3 ; upper part of product 34ba8556a4Smickeypl .reg %r4 ; lower part of product 35ba8556a4Smickeyop2 .reg %r4 ; multiplier 36ba8556a4Smickeyop1 .reg %r5 ; multiplicand 37ba8556a4Smickeycnt .reg %r6 ; count in multiply 38ba8556a4Smickeybrindex .reg %r7 ; index into the br. table 39ba8556a4Smickeysaveop2 .reg %r8 ; save op2 if high bit of multiplicand 40b94afd46Smickey ; is set 41ba8556a4Smickeypc .reg %r9 ; carry bit of product, = 00...01 42ba8556a4Smickeypm .reg %r10 ; value of -1 used in shifting 43ba8556a4Smickeytemp .reg %r6 448a472b3eSmickey 45c2feb252Smickey;**************************************************************************** 46c2feb252Smickey .text 477eec34daSmickeyLEAF_ENTRY(u_xmpy) 484f23d96fSmickey stws,ma pu,4(sp) ; save registers on stack 49b94afd46Smickey stws,ma pl,4(sp) ; save registers on stack 50b94afd46Smickey stws,ma op1,4(sp) ; save registers on stack 51b94afd46Smickey stws,ma cnt,4(sp) ; save registers on stack 52b94afd46Smickey stws,ma brindex,4(sp) ; save registers on stack 53b94afd46Smickey stws,ma saveop2,4(sp) ; save registers on stack 54b94afd46Smickey stws,ma pc,4(sp) ; save registers on stack 55b94afd46Smickey stws,ma pm,4(sp) ; save registers on stack 56b94afd46Smickey; 57b94afd46Smickey; Start multiply process 58b94afd46Smickey; 59b94afd46Smickey ldws 0(arg0),op1 ; get multiplicand 60b94afd46Smickey ldws 0(arg1),op2 ; get multiplier 61b94afd46Smickey addib,= 0,op1,fini0 ; op1 = 0, product = 0 62b94afd46Smickey addi 0,gr0,pu ; clear product 63b94afd46Smickey bb,>= op1,0,mpy1 ; test msb of multiplicand 64b94afd46Smickey addi 0,gr0,saveop2 ; clear saveop2 65b94afd46Smickey; 66b94afd46Smickey; msb of multiplicand is set so will save multiplier for a final 67b94afd46Smickey; addition into the result 68b94afd46Smickey; 69b94afd46Smickey extru,= op1,31,31,op1 ; clear msb of multiplicand 70b94afd46Smickey b mpy1 ; if op1 < 2**32, start multiply 71b94afd46Smickey add op2,gr0,saveop2 ; save op2 in saveop2 72b94afd46Smickey shd gr0,op2,1,pu ; shift op2 left 31 for result 73b94afd46Smickey b fini ; go to finish 748a472b3eSmickey shd op2,gr0,1,pl 75b94afd46Smickey; 76b94afd46Smickeympy1 addi -1,gr0,pm ; initialize pm to 111...1 77b94afd46Smickey addi 1,gr0,pc ; initialize pc to 00...01 78b94afd46Smickey movib,tr 8,cnt,mloop ; set count for mpy loop 79b94afd46Smickey extru op2,31,4,brindex ; 4 bits as index into table 80b94afd46Smickey; 818a472b3eSmickey .align 8 82b94afd46Smickey; 83b94afd46Smickey b sh4c ; br. if sign overflow 84b94afd46Smickeysh4n shd pu,pl,4,pl ; shift product right 4 bits 85b94afd46Smickey addib,<= -1,cnt,mulend ; reduce count by 1, exit if 86b94afd46Smickey extru pu,27,28,pu ; <= zero 87b94afd46Smickey; 88b94afd46Smickeymloop blr brindex,gr0 ; br. into table 89b94afd46Smickey ; entries of 2 words 90b94afd46Smickey extru op2,27,4,brindex ; next 4 bits into index 91b94afd46Smickey; 92b94afd46Smickey; 93b94afd46Smickey; branch table for the multiplication process with four multiplier bits 94b94afd46Smickey; 95b94afd46Smickeymtable ; two words per entry 96b94afd46Smickey; 97b94afd46Smickey; ---- bits = 0000 ---- shift product 4 bits ------------------------------- 98b94afd46Smickey; 99b94afd46Smickey b sh4n+4 ; just shift partial 100b94afd46Smickey shd pu,pl,4,pl ; product right 4 bits 101b94afd46Smickey; 102b94afd46Smickey; ---- bits = 0001 ---- add op1, then shift 4 bits 103b94afd46Smickey; 104b94afd46Smickey addb,tr op1,pu,sh4n+4 ; add op1 to product, to shift 105b94afd46Smickey shd pu,pl,4,pl ; product right 4 bits 106b94afd46Smickey; 107b94afd46Smickey; ---- bits = 0010 ---- add op1, add op1, then shift 4 bits 108b94afd46Smickey; 109b94afd46Smickey addb,tr op1,pu,sh4n ; add 2*op1, to shift 110b94afd46Smickey addb,uv op1,pu,sh4c ; product right 4 bits 111b94afd46Smickey; 112b94afd46Smickey; ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits 113b94afd46Smickey; 114b94afd46Smickey addb,tr op1,pu,sh4n-4 ; add op1 & 2*op1, shift 115b94afd46Smickey sh1add,nuv op1,pu,pu ; product right 4 bits 116b94afd46Smickey; 117b94afd46Smickey; ---- bits = 0100 ---- shift 2, add op1, shift 2 118b94afd46Smickey; 1198a472b3eSmickey b sh2sa 120b94afd46Smickey shd pu,pl,2,pl ; shift product 2 bits 121b94afd46Smickey; 122b94afd46Smickey; ---- bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again 123b94afd46Smickey; 124b94afd46Smickey addb,tr op1,pu,sh2us ; add op1 to product 125b94afd46Smickey shd pu,pl,2,pl ; shift 2 bits 126b94afd46Smickey; 127b94afd46Smickey; ---- bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again 128b94afd46Smickey; 129b94afd46Smickey addb,tr op1,pu,sh2c ; add 2*op1, to shift 2 bits 130b94afd46Smickey addb,nuv op1,pu,sh2us ; br. if not overflow 131b94afd46Smickey; 132b94afd46Smickey; ---- bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1 133b94afd46Smickey; 1348a472b3eSmickey b sh3s 135b94afd46Smickey sub pu,op1,pu ; subtract op1, br. to sh3s 1368a472b3eSmickey 137b94afd46Smickey; 138b94afd46Smickey; ---- bits = 1000 ---- shift 3, add op1, shift 1 139b94afd46Smickey; 1408a472b3eSmickey b sh3sa 141b94afd46Smickey shd pu,pl,3,pl ; shift product right 3 bits 142b94afd46Smickey; 143b94afd46Smickey; ---- bits = 1001 ---- add op1, shift 3, add op1, shift 1 144b94afd46Smickey; 145b94afd46Smickey addb,tr op1,pu,sh3us ; add op1, to shift 3, add op1, 146b94afd46Smickey shd pu,pl,3,pl ; and shift 1 147b94afd46Smickey; 148b94afd46Smickey; ---- bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1 149b94afd46Smickey; 150b94afd46Smickey addb,tr op1,pu,sh3c ; add 2*op1, to shift 3 bits 151b94afd46Smickey addb,nuv op1,pu,sh3us ; br. if no overflow 152b94afd46Smickey; 153b94afd46Smickey; ---- bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index 154b94afd46Smickey; 155b94afd46Smickey addib,tr 1,brindex,sh2s ; add 1 to index, subtract op1, 156b94afd46Smickey sub pu,op1,pu ; shift 2 with minus sign 157b94afd46Smickey; 158b94afd46Smickey; ---- bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index 159b94afd46Smickey; 160b94afd46Smickey addib,tr 1,brindex,sh2sb ; add 1 to index, to shift 161b94afd46Smickey shd pu,pl,2,pl ; shift right 2 bits signed 162b94afd46Smickey; 163b94afd46Smickey; ---- bits = 1101 ---- add op1, shift 2, add -op1, shift 2 164b94afd46Smickey; 165b94afd46Smickey addb,tr op1,pu,sh2ns ; add op1, to shift 2 166b94afd46Smickey shd pu,pl,2,pl ; right 2 unsigned, etc. 167b94afd46Smickey; 168b94afd46Smickey; ---- bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed 169b94afd46Smickey; 170b94afd46Smickey addib,tr 1,brindex,sh1sa ; add 1 to index, to shift 171b94afd46Smickey shd pu,pl,1,pl ; shift 1 bit 172b94afd46Smickey; 173b94afd46Smickey; ---- bits = 1111 ---- add -op1, shift 4 signed 174b94afd46Smickey; 175b94afd46Smickey addib,tr 1,brindex,sh4s ; add 1 to index, subtract op1, 176b94afd46Smickey sub pu,op1,pu ; to shift 4 signed 1778a472b3eSmickey 178b94afd46Smickey; 179b94afd46Smickey; ---- bits = 10000 ---- shift 4 signed 180b94afd46Smickey; 181b94afd46Smickey addib,tr 1,brindex,sh4s+4 ; add 1 to index 182b94afd46Smickey shd pu,pl,4,pl ; shift 4 signed 183b94afd46Smickey; 184b94afd46Smickey; ---- end of table --------------------------------------------------------- 185b94afd46Smickey; 1868a472b3eSmickeysh4s shd pu,pl,4,pl 187b94afd46Smickey addib,> -1,cnt,mloop ; decrement count, loop if > 0 188b94afd46Smickey shd pm,pu,4,pu ; shift 4, minus signed 189b94afd46Smickey addb,tr op1,pu,lastadd ; do one more add, then finish 190b94afd46Smickey addb,=,n saveop2,gr0,fini ; check saveop2 191b94afd46Smickey; 192b94afd46Smickeysh4c addib,> -1,cnt,mloop ; decrement count, loop if > 0 193b94afd46Smickey shd pc,pu,4,pu ; shift 4 with overflow 194b94afd46Smickey b lastadd ; end of multiply 195b94afd46Smickey addb,=,n saveop2,gr0,fini ; check saveop2 196b94afd46Smickey; 197b94afd46Smickeysh3c shd pu,pl,3,pl ; shift product 3 bits 198b94afd46Smickey shd pc,pu,3,pu ; shift 3 signed 199b94afd46Smickey addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 2008a472b3eSmickey shd pu,pl,1,pl 201b94afd46Smickey; 202b94afd46Smickeysh3us extru pu,28,29,pu ; shift 3 unsigned 203b94afd46Smickey addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 2048a472b3eSmickey shd pu,pl,1,pl 205b94afd46Smickey; 206b94afd46Smickeysh3sa extrs pu,28,29,pu ; shift 3 signed 207b94afd46Smickey addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 2088a472b3eSmickey shd pu,pl,1,pl 209b94afd46Smickey; 210b94afd46Smickeysh3s shd pu,pl,3,pl ; shift 3 minus signed 2118a472b3eSmickey shd pm,pu,3,pu 212b94afd46Smickey addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 2138a472b3eSmickey shd pu,pl,1,pl 214b94afd46Smickey; 215b94afd46Smickeysh1 addib,> -1,cnt,mloop ; loop if count > 0 2168a472b3eSmickey extru pu,30,31,pu 217b94afd46Smickey b lastadd ; end of multiply 218b94afd46Smickey addb,=,n saveop2,gr0,fini ; check saveop2 219b94afd46Smickey; 220b94afd46Smickeysh2ns addib,tr 1,brindex,sh2sb+4 ; increment index 221b94afd46Smickey extru pu,29,30,pu ; shift unsigned 222b94afd46Smickey; 223b94afd46Smickeysh2s shd pu,pl,2,pl ; shift with minus sign 224b94afd46Smickey shd pm,pu,2,pu ; 225b94afd46Smickey sub pu,op1,pu ; subtract op1 226b94afd46Smickey shd pu,pl,2,pl ; shift with minus sign 227b94afd46Smickey addib,> -1,cnt,mloop ; decrement count, loop if > 0 228b94afd46Smickey shd pm,pu,2,pu ; shift with minus sign 229b94afd46Smickey addb,tr op1,pu,lastadd ; do one more add, then finish 230b94afd46Smickey addb,=,n saveop2,gr0,fini ; check saveop2 231b94afd46Smickey; 232b94afd46Smickeysh2sb extrs pu,29,30,pu ; shift 2 signed 233b94afd46Smickey sub pu,op1,pu ; subtract op1 from product 234b94afd46Smickey shd pu,pl,2,pl ; shift with minus sign 235b94afd46Smickey addib,> -1,cnt,mloop ; decrement count, loop if > 0 236b94afd46Smickey shd pm,pu,2,pu ; shift with minus sign 237b94afd46Smickey addb,tr op1,pu,lastadd ; do one more add, then finish 238b94afd46Smickey addb,=,n saveop2,gr0,fini ; check saveop2 239b94afd46Smickey; 240b94afd46Smickeysh1sa extrs pu,30,31,pu ; signed 241b94afd46Smickey sub pu,op1,pu ; subtract op1 from product 242b94afd46Smickey shd pu,pl,3,pl ; shift 3 with minus sign 243b94afd46Smickey addib,> -1,cnt,mloop ; decrement count, loop if >0 2448a472b3eSmickey shd pm,pu,3,pu 245b94afd46Smickey addb,tr op1,pu,lastadd ; do one more add, then finish 246b94afd46Smickey addb,=,n saveop2,gr0,fini ; check saveop2 247b94afd46Smickey; 248b94afd46Smickeyfini0 movib,tr 0,pl,fini ; product = 0 as op1 = 0 249b94afd46Smickey stws pu,0(arg2) ; save high part of result 250b94afd46Smickey; 251b94afd46Smickeysh2us extru pu,29,30,pu ; shift 2 unsigned 252b94afd46Smickey addb,tr op1,pu,sh2a ; add op1 253b94afd46Smickey shd pu,pl,2,pl ; shift 2 bits 254b94afd46Smickey; 2558a472b3eSmickeysh2c shd pu,pl,2,pl 256b94afd46Smickey shd pc,pu,2,pu ; shift with carry 257b94afd46Smickey addb,tr op1,pu,sh2a ; add op1 to product 258b94afd46Smickey shd pu,pl,2,pl ; br. to sh2 to shift pu 259b94afd46Smickey; 260b94afd46Smickeysh2sa extrs pu,29,30,pu ; shift with sign 261b94afd46Smickey addb,tr op1,pu,sh2a ; add op1 to product 262b94afd46Smickey shd pu,pl,2,pl ; br. to sh2 to shift pu 263b94afd46Smickey; 264b94afd46Smickeysh2a addib,> -1,cnt,mloop ; loop if count > 0 2658a472b3eSmickey extru pu,29,30,pu 266b94afd46Smickey; 267b94afd46Smickeymulend addb,=,n saveop2,gr0,fini ; check saveop2 268b94afd46Smickeylastadd shd saveop2,gr0,1,temp ; if saveop2 <> 0, shift it 269b94afd46Smickey shd gr0,saveop2,1,saveop2 ; left 31 and add to result 2708a472b3eSmickey add pl,temp,pl 2718a472b3eSmickey addc pu,saveop2,pu 272b94afd46Smickey; 273b94afd46Smickey; finish 274b94afd46Smickey; 275b94afd46Smickeyfini stws pu,0(arg2) ; save high part of result 276b94afd46Smickey stws pl,4(arg2) ; save low part of result 2778a472b3eSmickey 278b94afd46Smickey ldws,mb -4(sp),pm ; restore registers 279b94afd46Smickey ldws,mb -4(sp),pc ; restore registers 280b94afd46Smickey ldws,mb -4(sp),saveop2 ; restore registers 281b94afd46Smickey ldws,mb -4(sp),brindex ; restore registers 282b94afd46Smickey ldws,mb -4(sp),cnt ; restore registers 283b94afd46Smickey ldws,mb -4(sp),op1 ; restore registers 284b94afd46Smickey ldws,mb -4(sp),pl ; restore registers 285b94afd46Smickey bv 0(rp) ; return 286b94afd46Smickey ldws,mb -4(sp),pu ; restore registers 2877eec34daSmickeyEXIT(u_xmpy) 2884f23d96fSmickey 2898a472b3eSmickey .end 290