1*b6cbf720SGianluca Guida/* $NetBSD: mul.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */ 2*b6cbf720SGianluca Guida 3*b6cbf720SGianluca Guida/* 4*b6cbf720SGianluca Guida * Copyright (c) 1992, 1993 5*b6cbf720SGianluca Guida * The Regents of the University of California. All rights reserved. 6*b6cbf720SGianluca Guida * 7*b6cbf720SGianluca Guida * This software was developed by the Computer Systems Engineering group 8*b6cbf720SGianluca Guida * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 9*b6cbf720SGianluca Guida * contributed to Berkeley. 10*b6cbf720SGianluca Guida * 11*b6cbf720SGianluca Guida * Redistribution and use in source and binary forms, with or without 12*b6cbf720SGianluca Guida * modification, are permitted provided that the following conditions 13*b6cbf720SGianluca Guida * are met: 14*b6cbf720SGianluca Guida * 1. Redistributions of source code must retain the above copyright 15*b6cbf720SGianluca Guida * notice, this list of conditions and the following disclaimer. 16*b6cbf720SGianluca Guida * 2. Redistributions in binary form must reproduce the above copyright 17*b6cbf720SGianluca Guida * notice, this list of conditions and the following disclaimer in the 18*b6cbf720SGianluca Guida * documentation and/or other materials provided with the distribution. 19*b6cbf720SGianluca Guida * 3. Neither the name of the University nor the names of its contributors 20*b6cbf720SGianluca Guida * may be used to endorse or promote products derived from this software 21*b6cbf720SGianluca Guida * without specific prior written permission. 22*b6cbf720SGianluca Guida * 23*b6cbf720SGianluca Guida * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24*b6cbf720SGianluca Guida * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25*b6cbf720SGianluca Guida * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26*b6cbf720SGianluca Guida * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27*b6cbf720SGianluca Guida * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28*b6cbf720SGianluca Guida * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29*b6cbf720SGianluca Guida * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30*b6cbf720SGianluca Guida * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31*b6cbf720SGianluca Guida * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32*b6cbf720SGianluca Guida * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33*b6cbf720SGianluca Guida * SUCH DAMAGE. 34*b6cbf720SGianluca Guida * 35*b6cbf720SGianluca Guida * from: Header: mul.s,v 1.5 92/06/25 13:24:03 torek Exp 36*b6cbf720SGianluca Guida */ 37*b6cbf720SGianluca Guida 38*b6cbf720SGianluca Guida#include <machine/asm.h> 39*b6cbf720SGianluca Guida#if defined(LIBC_SCCS) && !defined(lint) 40*b6cbf720SGianluca Guida#if 0 41*b6cbf720SGianluca Guida .asciz "@(#)mul.s 8.1 (Berkeley) 6/4/93" 42*b6cbf720SGianluca Guida#else 43*b6cbf720SGianluca Guida RCSID("$NetBSD: mul.S,v 1.1 2005/12/20 19:28:50 christos Exp $") 44*b6cbf720SGianluca Guida#endif 45*b6cbf720SGianluca Guida#endif /* LIBC_SCCS and not lint */ 46*b6cbf720SGianluca Guida 47*b6cbf720SGianluca Guida/* 48*b6cbf720SGianluca Guida * Signed multiply, from Appendix E of the Sparc Version 8 49*b6cbf720SGianluca Guida * Architecture Manual. 50*b6cbf720SGianluca Guida * 51*b6cbf720SGianluca Guida * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of 52*b6cbf720SGianluca Guida * the 64-bit product). 53*b6cbf720SGianluca Guida * 54*b6cbf720SGianluca Guida * This code optimizes short (less than 13-bit) multiplies. 55*b6cbf720SGianluca Guida */ 56*b6cbf720SGianluca Guida 57*b6cbf720SGianluca GuidaFUNC(.mul) 58*b6cbf720SGianluca Guida mov %o0, %y ! multiplier -> Y 59*b6cbf720SGianluca Guida andncc %o0, 0xfff, %g0 ! test bits 12..31 60*b6cbf720SGianluca Guida be Lmul_shortway ! if zero, can do it the short way 61*b6cbf720SGianluca Guida andcc %g0, %g0, %o4 ! zero the partial product and clear N and V 62*b6cbf720SGianluca Guida 63*b6cbf720SGianluca Guida /* 64*b6cbf720SGianluca Guida * Long multiply. 32 steps, followed by a final shift step. 65*b6cbf720SGianluca Guida */ 66*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 1 67*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 2 68*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 3 69*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 4 70*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 5 71*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 6 72*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 7 73*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 8 74*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 9 75*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 10 76*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 11 77*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 12 78*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 13 79*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 14 80*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 15 81*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 16 82*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 17 83*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 18 84*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 19 85*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 20 86*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 21 87*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 22 88*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 23 89*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 24 90*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 25 91*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 26 92*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 27 93*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 28 94*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 29 95*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 30 96*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 31 97*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 32 98*b6cbf720SGianluca Guida mulscc %o4, %g0, %o4 ! final shift 99*b6cbf720SGianluca Guida 100*b6cbf720SGianluca Guida ! If %o0 was negative, the result is 101*b6cbf720SGianluca Guida ! (%o0 * %o1) + (%o1 << 32)) 102*b6cbf720SGianluca Guida ! We fix that here. 103*b6cbf720SGianluca Guida 104*b6cbf720SGianluca Guida tst %o0 105*b6cbf720SGianluca Guida bge 1f 106*b6cbf720SGianluca Guida rd %y, %o0 107*b6cbf720SGianluca Guida 108*b6cbf720SGianluca Guida ! %o0 was indeed negative; fix upper 32 bits of result by subtracting 109*b6cbf720SGianluca Guida ! %o1 (i.e., return %o4 - %o1 in %o1). 110*b6cbf720SGianluca Guida retl 111*b6cbf720SGianluca Guida sub %o4, %o1, %o1 112*b6cbf720SGianluca Guida 113*b6cbf720SGianluca Guida1: 114*b6cbf720SGianluca Guida retl 115*b6cbf720SGianluca Guida mov %o4, %o1 116*b6cbf720SGianluca Guida 117*b6cbf720SGianluca GuidaLmul_shortway: 118*b6cbf720SGianluca Guida /* 119*b6cbf720SGianluca Guida * Short multiply. 12 steps, followed by a final shift step. 120*b6cbf720SGianluca Guida * The resulting bits are off by 12 and (32-12) = 20 bit positions, 121*b6cbf720SGianluca Guida * but there is no problem with %o0 being negative (unlike above). 122*b6cbf720SGianluca Guida */ 123*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 1 124*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 2 125*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 3 126*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 4 127*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 5 128*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 6 129*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 7 130*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 8 131*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 9 132*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 10 133*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 11 134*b6cbf720SGianluca Guida mulscc %o4, %o1, %o4 ! 12 135*b6cbf720SGianluca Guida mulscc %o4, %g0, %o4 ! final shift 136*b6cbf720SGianluca Guida 137*b6cbf720SGianluca Guida /* 138*b6cbf720SGianluca Guida * %o4 has 20 of the bits that should be in the low part of the 139*b6cbf720SGianluca Guida * result; %y has the bottom 12 (as %y's top 12). That is: 140*b6cbf720SGianluca Guida * 141*b6cbf720SGianluca Guida * %o4 %y 142*b6cbf720SGianluca Guida * +----------------+----------------+ 143*b6cbf720SGianluca Guida * | -12- | -20- | -12- | -20- | 144*b6cbf720SGianluca Guida * +------(---------+------)---------+ 145*b6cbf720SGianluca Guida * --hi-- ----low-part---- 146*b6cbf720SGianluca Guida * 147*b6cbf720SGianluca Guida * The upper 12 bits of %o4 should be sign-extended to form the 148*b6cbf720SGianluca Guida * high part of the product (i.e., highpart = %o4 >> 20). 149*b6cbf720SGianluca Guida */ 150*b6cbf720SGianluca Guida 151*b6cbf720SGianluca Guida rd %y, %o5 152*b6cbf720SGianluca Guida sll %o4, 12, %o0 ! shift middle bits left 12 153*b6cbf720SGianluca Guida srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left 154*b6cbf720SGianluca Guida or %o5, %o0, %o0 ! construct low part of result 155*b6cbf720SGianluca Guida retl 156*b6cbf720SGianluca Guida sra %o4, 20, %o1 ! ... and extract high part of result 157