1*97690669Srin/* $NetBSD: muldi3.S,v 1.2 2020/05/31 12:37:07 rin Exp $ */ 23a564f24Srin 33a564f24Srin/* 43a564f24Srin * Copyright (c) 2020 The NetBSD Foundation, Inc. 53a564f24Srin * All rights reserved. 63a564f24Srin * 73a564f24Srin * This code is derived from software contributed to The NetBSD Foundation 83a564f24Srin * by Rin Okuyama. 93a564f24Srin * 103a564f24Srin * Redistribution and use in source and binary forms, with or without 113a564f24Srin * modification, are permitted provided that the following conditions 123a564f24Srin * are met: 133a564f24Srin * 1. Redistributions of source code must retain the above copyright 143a564f24Srin * notice, this list of conditions and the following disclaimer. 153a564f24Srin * 2. Redistributions in binary form must reproduce the above copyright 163a564f24Srin * notice, this list of conditions and the following disclaimer in the 173a564f24Srin * documentation and/or other materials provided with the distribution. 183a564f24Srin * 193a564f24Srin * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 203a564f24Srin * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 213a564f24Srin * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 223a564f24Srin * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 233a564f24Srin * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 243a564f24Srin * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 253a564f24Srin * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 263a564f24Srin * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 273a564f24Srin * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 283a564f24Srin * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 293a564f24Srin * POSSIBILITY OF SUCH DAMAGE. 303a564f24Srin */ 313a564f24Srin 323a564f24Srin#include <machine/asm.h> 333a564f24Srin 34*97690669SrinRCSID("$NetBSD: muldi3.S,v 1.2 2020/05/31 12:37:07 rin Exp $") 353a564f24Srin 363a564f24Srin| int64_t __muldi3(int64_t X, int64_t Y); 373a564f24Srin| 383a564f24Srin| * Return lower 64bit of (X * Y) into %d0:%d1. 393a564f24Srin| 403a564f24Srin| * Intended for 68060: 413a564f24Srin| - GCC does not emit __muldi3() for 68020-40, that have 32 * 32 --> 64 mulul. 423a564f24Srin| - mulsl (and moveml) are not implemented for 68010. 433a564f24Srin| 443a564f24Srin| * Notation: 453a564f24Srin| - H32:L32 --> higher:lower 32bit of variable 463a564f24Srin| - H:L --> higher:lower 16bit of variable/register 473a564f24Srin 483a564f24Srin#ifdef __mc68010__ 493a564f24Srin#error "not for 68010" 503a564f24Srin#endif 513a564f24Srin 523a564f24Srin#define X_H32 (4 * 4) 533a564f24Srin#define X_L32 (X_H32 + 4) 543a564f24Srin#define Y_H32 (X_L32 + 4) 553a564f24Srin#define Y_L32 (Y_H32 + 4) 563a564f24Srin 573a564f24SrinENTRY(__muldi3) 583a564f24Srin moveml %d2-%d4, -(%sp) | push %d2-%d4 593a564f24Srin 603a564f24Srin| First, calculate (X_L32 * Y_L32) as a 64bit integer. 613a564f24Srin 623a564f24Srin movel X_L32(%sp), %a0 | save X_L32 633a564f24Srin movel Y_L32(%sp), %a1 | save Y_L32 643a564f24Srin 653a564f24Srin movel %a0, %d2 | prepare for X_L32(H) in L 663a564f24Srin movel %a1, %d3 | prepare for Y_L32(H) in L 673a564f24Srin 683a564f24Srin movel %a0, %d4 | X_L32(L) in L 693a564f24Srin movel %a1, %d1 | Y_L32(L) in L 703a564f24Srin movel %a0, %d0 | X_L32(L) in L 713a564f24Srin 723a564f24Srin swap %d2 | X_L32(H) in L 733a564f24Srin swap %d3 | Y_L32(H) in L 743a564f24Srin 753a564f24Srin muluw %d1, %d4 | A = X_L32(L) * Y_L32(L) 763a564f24Srin muluw %d2, %d1 | B = X_L32(H) * Y_L32(L) 773a564f24Srin muluw %d3, %d2 | C = X_L32(H) * Y_L32(H) 783a564f24Srin muluw %d0, %d3 | D = X_L32(L) * Y_L32(H) 793a564f24Srin 803a564f24Srin movel %d4, %d0 | extract A(H) 813a564f24Srin clrw %d0 823a564f24Srin swap %d0 833a564f24Srin 843a564f24Srin addl %d0, %d1 | B += A(H) (no carry; max 0xffff0000) 853a564f24Srin 863a564f24Srin addl %d3, %d1 | B += D 873a564f24Srin bccs 1f | if (carry) 883a564f24Srin addil #0x10000, %d2 | C += 0x10000 893a564f24Srin 903a564f24Srin1: swap %d1 | B(H) <--> B(L) 913a564f24Srin 923a564f24Srin| (%d0), (%d1), %d2 = C, %d3 = free, %d4 = A 933a564f24Srin 943a564f24Srin clrl %d3 | extract B(H) 953a564f24Srin movew %d1, %d3 963a564f24Srin 973a564f24Srin movew %d4, %d1 | %d1 = (B(L) << 16) + A(L) 983a564f24Srin 993a564f24Srin addl %d3, %d2 | C += B(H) 1003a564f24Srin 1013a564f24Srin| We have (X_L32 * Y_L32) in %d2:%d1. Lower 32bit was completed. 1023a564f24Srin| Add (X_L32 * Y_H32 + X_H32 * Y_L32) to higher 32bit. 1033a564f24Srin| 1043a564f24Srin| (%d0), (%d1), %d2 = C, %d3 = free, %d4 = free 1053a564f24Srin 1063a564f24Srin movel %a0, %d0 | restore X_L32 1073a564f24Srin movel %a1, %d3 | restore Y_L32 1083a564f24Srin mulsl Y_H32(%sp), %d0 | E = X_L32 * Y_H32 1093a564f24Srin mulsl X_H32(%sp), %d3 | F = X_H32 * Y_L32 1103a564f24Srin addl %d2, %d0 | E += C 1113a564f24Srin addl %d3, %d0 | %d0 = E + F 1123a564f24Srin 113*97690669Srin moveml (%sp)+, %d2-%d4 | pop %d2-%d4 1143a564f24Srin rts 1153a564f24SrinEND(__muldi3) 116