1*37c9f0a6Schristos/* $NetBSD: mul.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */ 2*37c9f0a6Schristos 3*37c9f0a6Schristos/* 4*37c9f0a6Schristos * Copyright (c) 1992, 1993 5*37c9f0a6Schristos * The Regents of the University of California. All rights reserved. 6*37c9f0a6Schristos * 7*37c9f0a6Schristos * This software was developed by the Computer Systems Engineering group 8*37c9f0a6Schristos * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 9*37c9f0a6Schristos * contributed to Berkeley. 10*37c9f0a6Schristos * 11*37c9f0a6Schristos * Redistribution and use in source and binary forms, with or without 12*37c9f0a6Schristos * modification, are permitted provided that the following conditions 13*37c9f0a6Schristos * are met: 14*37c9f0a6Schristos * 1. Redistributions of source code must retain the above copyright 15*37c9f0a6Schristos * notice, this list of conditions and the following disclaimer. 16*37c9f0a6Schristos * 2. Redistributions in binary form must reproduce the above copyright 17*37c9f0a6Schristos * notice, this list of conditions and the following disclaimer in the 18*37c9f0a6Schristos * documentation and/or other materials provided with the distribution. 19*37c9f0a6Schristos * 3. Neither the name of the University nor the names of its contributors 20*37c9f0a6Schristos * may be used to endorse or promote products derived from this software 21*37c9f0a6Schristos * without specific prior written permission. 22*37c9f0a6Schristos * 23*37c9f0a6Schristos * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24*37c9f0a6Schristos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25*37c9f0a6Schristos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26*37c9f0a6Schristos * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27*37c9f0a6Schristos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28*37c9f0a6Schristos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29*37c9f0a6Schristos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30*37c9f0a6Schristos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31*37c9f0a6Schristos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32*37c9f0a6Schristos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33*37c9f0a6Schristos * SUCH DAMAGE. 34*37c9f0a6Schristos * 35*37c9f0a6Schristos * from: Header: mul.s,v 1.5 92/06/25 13:24:03 torek Exp 36*37c9f0a6Schristos */ 37*37c9f0a6Schristos 38*37c9f0a6Schristos#include <machine/asm.h> 39*37c9f0a6Schristos#if defined(LIBC_SCCS) && !defined(lint) 40*37c9f0a6Schristos#if 0 41*37c9f0a6Schristos .asciz "@(#)mul.s 8.1 (Berkeley) 6/4/93" 42*37c9f0a6Schristos#else 43*37c9f0a6Schristos RCSID("$NetBSD: mul.S,v 1.1 2005/12/20 19:28:50 christos Exp $") 44*37c9f0a6Schristos#endif 45*37c9f0a6Schristos#endif /* LIBC_SCCS and not lint */ 46*37c9f0a6Schristos 47*37c9f0a6Schristos/* 48*37c9f0a6Schristos * Signed multiply, from Appendix E of the Sparc Version 8 49*37c9f0a6Schristos * Architecture Manual. 50*37c9f0a6Schristos * 51*37c9f0a6Schristos * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of 52*37c9f0a6Schristos * the 64-bit product). 53*37c9f0a6Schristos * 54*37c9f0a6Schristos * This code optimizes short (less than 13-bit) multiplies. 55*37c9f0a6Schristos */ 56*37c9f0a6Schristos 57*37c9f0a6SchristosFUNC(.mul) 58*37c9f0a6Schristos mov %o0, %y ! multiplier -> Y 59*37c9f0a6Schristos andncc %o0, 0xfff, %g0 ! test bits 12..31 60*37c9f0a6Schristos be Lmul_shortway ! if zero, can do it the short way 61*37c9f0a6Schristos andcc %g0, %g0, %o4 ! zero the partial product and clear N and V 62*37c9f0a6Schristos 63*37c9f0a6Schristos /* 64*37c9f0a6Schristos * Long multiply. 32 steps, followed by a final shift step. 65*37c9f0a6Schristos */ 66*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 1 67*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 2 68*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 3 69*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 4 70*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 5 71*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 6 72*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 7 73*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 8 74*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 9 75*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 10 76*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 11 77*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 12 78*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 13 79*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 14 80*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 15 81*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 16 82*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 17 83*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 18 84*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 19 85*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 20 86*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 21 87*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 22 88*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 23 89*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 24 90*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 25 91*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 26 92*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 27 93*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 28 94*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 29 95*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 30 96*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 31 97*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 32 98*37c9f0a6Schristos mulscc %o4, %g0, %o4 ! final shift 99*37c9f0a6Schristos 100*37c9f0a6Schristos ! If %o0 was negative, the result is 101*37c9f0a6Schristos ! (%o0 * %o1) + (%o1 << 32)) 102*37c9f0a6Schristos ! We fix that here. 103*37c9f0a6Schristos 104*37c9f0a6Schristos tst %o0 105*37c9f0a6Schristos bge 1f 106*37c9f0a6Schristos rd %y, %o0 107*37c9f0a6Schristos 108*37c9f0a6Schristos ! %o0 was indeed negative; fix upper 32 bits of result by subtracting 109*37c9f0a6Schristos ! %o1 (i.e., return %o4 - %o1 in %o1). 110*37c9f0a6Schristos retl 111*37c9f0a6Schristos sub %o4, %o1, %o1 112*37c9f0a6Schristos 113*37c9f0a6Schristos1: 114*37c9f0a6Schristos retl 115*37c9f0a6Schristos mov %o4, %o1 116*37c9f0a6Schristos 117*37c9f0a6SchristosLmul_shortway: 118*37c9f0a6Schristos /* 119*37c9f0a6Schristos * Short multiply. 12 steps, followed by a final shift step. 120*37c9f0a6Schristos * The resulting bits are off by 12 and (32-12) = 20 bit positions, 121*37c9f0a6Schristos * but there is no problem with %o0 being negative (unlike above). 122*37c9f0a6Schristos */ 123*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 1 124*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 2 125*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 3 126*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 4 127*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 5 128*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 6 129*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 7 130*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 8 131*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 9 132*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 10 133*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 11 134*37c9f0a6Schristos mulscc %o4, %o1, %o4 ! 12 135*37c9f0a6Schristos mulscc %o4, %g0, %o4 ! final shift 136*37c9f0a6Schristos 137*37c9f0a6Schristos /* 138*37c9f0a6Schristos * %o4 has 20 of the bits that should be in the low part of the 139*37c9f0a6Schristos * result; %y has the bottom 12 (as %y's top 12). That is: 140*37c9f0a6Schristos * 141*37c9f0a6Schristos * %o4 %y 142*37c9f0a6Schristos * +----------------+----------------+ 143*37c9f0a6Schristos * | -12- | -20- | -12- | -20- | 144*37c9f0a6Schristos * +------(---------+------)---------+ 145*37c9f0a6Schristos * --hi-- ----low-part---- 146*37c9f0a6Schristos * 147*37c9f0a6Schristos * The upper 12 bits of %o4 should be sign-extended to form the 148*37c9f0a6Schristos * high part of the product (i.e., highpart = %o4 >> 20). 149*37c9f0a6Schristos */ 150*37c9f0a6Schristos 151*37c9f0a6Schristos rd %y, %o5 152*37c9f0a6Schristos sll %o4, 12, %o0 ! shift middle bits left 12 153*37c9f0a6Schristos srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left 154*37c9f0a6Schristos or %o5, %o0, %o0 ! construct low part of result 155*37c9f0a6Schristos retl 156*37c9f0a6Schristos sra %o4, 20, %o1 ! ... and extract high part of result 157