154391Storek/* 2*61170Sbostic * Copyright (c) 1992, 1993 3*61170Sbostic * The Regents of the University of California. All rights reserved. 454391Storek * 554391Storek * This software was developed by the Computer Systems Engineering group 654391Storek * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 754391Storek * contributed to Berkeley. 854391Storek * 954391Storek * %sccs.include.redist.c% 1054391Storek * 1154391Storek * from: $Header: mul.s,v 1.5 92/06/25 13:24:03 torek Exp $ 1254391Storek */ 1354391Storek 1454391Storek#if defined(LIBC_SCCS) && !defined(lint) 15*61170Sbostic .asciz "@(#)mul.s 8.1 (Berkeley) 06/04/93" 1654391Storek#endif /* LIBC_SCCS and not lint */ 1754391Storek 1854391Storek/* 1954391Storek * Signed multiply, from Appendix E of the Sparc Version 8 2054391Storek * Architecture Manual. 2154391Storek * 2254391Storek * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of 2354391Storek * the 64-bit product). 2454391Storek * 2554391Storek * This code optimizes short (less than 13-bit) multiplies. 2654391Storek */ 2754391Storek 2854391Storek#include "DEFS.h" 2954391StorekFUNC(.mul) 3054391Storek mov %o0, %y ! multiplier -> Y 3154391Storek andncc %o0, 0xfff, %g0 ! test bits 12..31 3254391Storek be Lmul_shortway ! if zero, can do it the short way 3354391Storek andcc %g0, %g0, %o4 ! zero the partial product and clear N and V 3454391Storek 3554391Storek /* 3654391Storek * Long multiply. 32 steps, followed by a final shift step. 3754391Storek */ 3854391Storek mulscc %o4, %o1, %o4 ! 1 3954391Storek mulscc %o4, %o1, %o4 ! 2 4054391Storek mulscc %o4, %o1, %o4 ! 3 4154391Storek mulscc %o4, %o1, %o4 ! 4 4254391Storek mulscc %o4, %o1, %o4 ! 5 4354391Storek mulscc %o4, %o1, %o4 ! 6 4454391Storek mulscc %o4, %o1, %o4 ! 7 4554391Storek mulscc %o4, %o1, %o4 ! 8 4654391Storek mulscc %o4, %o1, %o4 ! 9 4754391Storek mulscc %o4, %o1, %o4 ! 10 4854391Storek mulscc %o4, %o1, %o4 ! 11 4954391Storek mulscc %o4, %o1, %o4 ! 12 5054391Storek mulscc %o4, %o1, %o4 ! 13 5154391Storek mulscc %o4, %o1, %o4 ! 14 5254391Storek mulscc %o4, %o1, %o4 ! 15 5354391Storek mulscc %o4, %o1, %o4 ! 16 5454391Storek mulscc %o4, %o1, %o4 ! 17 5554391Storek mulscc %o4, %o1, %o4 ! 18 5654391Storek mulscc %o4, %o1, %o4 ! 19 5754391Storek mulscc %o4, %o1, %o4 ! 20 5854391Storek mulscc %o4, %o1, %o4 ! 21 5954391Storek mulscc %o4, %o1, %o4 ! 22 6054391Storek mulscc %o4, %o1, %o4 ! 23 6154391Storek mulscc %o4, %o1, %o4 ! 24 6254391Storek mulscc %o4, %o1, %o4 ! 25 6354391Storek mulscc %o4, %o1, %o4 ! 26 6454391Storek mulscc %o4, %o1, %o4 ! 27 6554391Storek mulscc %o4, %o1, %o4 ! 28 6654391Storek mulscc %o4, %o1, %o4 ! 29 6754391Storek mulscc %o4, %o1, %o4 ! 30 6854391Storek mulscc %o4, %o1, %o4 ! 31 6954391Storek mulscc %o4, %o1, %o4 ! 32 7054391Storek mulscc %o4, %g0, %o4 ! final shift 7154391Storek 7254391Storek ! If %o0 was negative, the result is 7354391Storek ! (%o0 * %o1) + (%o1 << 32)) 7454391Storek ! We fix that here. 7554391Storek 7654391Storek tst %o0 7754391Storek bge 1f 7854391Storek rd %y, %o0 7954391Storek 8054391Storek ! %o0 was indeed negative; fix upper 32 bits of result by subtracting 8154391Storek ! %o1 (i.e., return %o4 - %o1 in %o1). 8254391Storek retl 8354391Storek sub %o4, %o1, %o1 8454391Storek 8554391Storek1: 8654391Storek retl 8754391Storek mov %o4, %o1 8854391Storek 8954391StorekLmul_shortway: 9054391Storek /* 9154391Storek * Short multiply. 12 steps, followed by a final shift step. 9254391Storek * The resulting bits are off by 12 and (32-12) = 20 bit positions, 9354391Storek * but there is no problem with %o0 being negative (unlike above). 9454391Storek */ 9554391Storek mulscc %o4, %o1, %o4 ! 1 9654391Storek mulscc %o4, %o1, %o4 ! 2 9754391Storek mulscc %o4, %o1, %o4 ! 3 9854391Storek mulscc %o4, %o1, %o4 ! 4 9954391Storek mulscc %o4, %o1, %o4 ! 5 10054391Storek mulscc %o4, %o1, %o4 ! 6 10154391Storek mulscc %o4, %o1, %o4 ! 7 10254391Storek mulscc %o4, %o1, %o4 ! 8 10354391Storek mulscc %o4, %o1, %o4 ! 9 10454391Storek mulscc %o4, %o1, %o4 ! 10 10554391Storek mulscc %o4, %o1, %o4 ! 11 10654391Storek mulscc %o4, %o1, %o4 ! 12 10754391Storek mulscc %o4, %g0, %o4 ! final shift 10854391Storek 10954391Storek /* 11054391Storek * %o4 has 20 of the bits that should be in the low part of the 11154391Storek * result; %y has the bottom 12 (as %y's top 12). That is: 11254391Storek * 11354391Storek * %o4 %y 11454391Storek * +----------------+----------------+ 11554391Storek * | -12- | -20- | -12- | -20- | 11654391Storek * +------(---------+------)---------+ 11754391Storek * --hi-- ----low-part---- 11854391Storek * 11954391Storek * The upper 12 bits of %o4 should be sign-extended to form the 12054391Storek * high part of the product (i.e., highpart = %o4 >> 20). 12154391Storek */ 12254391Storek 12354391Storek rd %y, %o5 12454391Storek sll %o4, 12, %o0 ! shift middle bits left 12 12554391Storek srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left 12654391Storek or %o5, %o0, %o0 ! construct low part of result 12754391Storek retl 12854391Storek sra %o4, 20, %o1 ! ... and extract high part of result 129