xref: /netbsd-src/common/lib/libc/arch/m68k/gen/muldi3.S (revision 97690669791023668350378835254001c39bf94d)
1*97690669Srin/*	$NetBSD: muldi3.S,v 1.2 2020/05/31 12:37:07 rin Exp $	*/
23a564f24Srin
33a564f24Srin/*
43a564f24Srin * Copyright (c) 2020 The NetBSD Foundation, Inc.
53a564f24Srin * All rights reserved.
63a564f24Srin *
73a564f24Srin * This code is derived from software contributed to The NetBSD Foundation
83a564f24Srin * by Rin Okuyama.
93a564f24Srin *
103a564f24Srin * Redistribution and use in source and binary forms, with or without
113a564f24Srin * modification, are permitted provided that the following conditions
123a564f24Srin * are met:
133a564f24Srin * 1. Redistributions of source code must retain the above copyright
143a564f24Srin *    notice, this list of conditions and the following disclaimer.
153a564f24Srin * 2. Redistributions in binary form must reproduce the above copyright
163a564f24Srin *    notice, this list of conditions and the following disclaimer in the
173a564f24Srin *    documentation and/or other materials provided with the distribution.
183a564f24Srin *
193a564f24Srin * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
203a564f24Srin * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
213a564f24Srin * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
223a564f24Srin * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
233a564f24Srin * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
243a564f24Srin * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
253a564f24Srin * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
263a564f24Srin * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
273a564f24Srin * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
283a564f24Srin * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
293a564f24Srin * POSSIBILITY OF SUCH DAMAGE.
303a564f24Srin */
313a564f24Srin
323a564f24Srin#include <machine/asm.h>
333a564f24Srin
34*97690669SrinRCSID("$NetBSD: muldi3.S,v 1.2 2020/05/31 12:37:07 rin Exp $")
353a564f24Srin
363a564f24Srin| int64_t __muldi3(int64_t X, int64_t Y);
373a564f24Srin|
383a564f24Srin| * Return lower 64bit of (X * Y) into %d0:%d1.
393a564f24Srin|
403a564f24Srin| * Intended for 68060:
413a564f24Srin|   - GCC does not emit __muldi3() for 68020-40, that have 32 * 32 --> 64 mulul.
423a564f24Srin|   - mulsl (and moveml) are not implemented for 68010.
433a564f24Srin|
443a564f24Srin| * Notation:
453a564f24Srin|   - H32:L32 --> higher:lower 32bit of variable
463a564f24Srin|   - H:L     --> higher:lower 16bit of variable/register
473a564f24Srin
483a564f24Srin#ifdef __mc68010__
493a564f24Srin#error "not for 68010"
503a564f24Srin#endif
513a564f24Srin
523a564f24Srin#define X_H32 (4 * 4)
533a564f24Srin#define X_L32 (X_H32 + 4)
543a564f24Srin#define Y_H32 (X_L32 + 4)
553a564f24Srin#define Y_L32 (Y_H32 + 4)
563a564f24Srin
573a564f24SrinENTRY(__muldi3)
583a564f24Srin	moveml	%d2-%d4, -(%sp)	| push %d2-%d4
593a564f24Srin
603a564f24Srin| First, calculate (X_L32 * Y_L32) as a 64bit integer.
613a564f24Srin
623a564f24Srin	movel	X_L32(%sp), %a0	| save X_L32
633a564f24Srin	movel	Y_L32(%sp), %a1	| save Y_L32
643a564f24Srin
653a564f24Srin	movel	%a0, %d2	| prepare for X_L32(H) in L
663a564f24Srin	movel	%a1, %d3	| prepare for Y_L32(H) in L
673a564f24Srin
683a564f24Srin	movel	%a0, %d4	| X_L32(L) in L
693a564f24Srin	movel	%a1, %d1	| Y_L32(L) in L
703a564f24Srin	movel	%a0, %d0	| X_L32(L) in L
713a564f24Srin
723a564f24Srin	swap	%d2		| X_L32(H) in L
733a564f24Srin	swap	%d3 		| Y_L32(H) in L
743a564f24Srin
753a564f24Srin	muluw	%d1, %d4	| A = X_L32(L) * Y_L32(L)
763a564f24Srin	muluw	%d2, %d1	| B = X_L32(H) * Y_L32(L)
773a564f24Srin	muluw	%d3, %d2	| C = X_L32(H) * Y_L32(H)
783a564f24Srin	muluw	%d0, %d3	| D = X_L32(L) * Y_L32(H)
793a564f24Srin
803a564f24Srin	movel	%d4, %d0	| extract A(H)
813a564f24Srin	clrw	%d0
823a564f24Srin	swap	%d0
833a564f24Srin
843a564f24Srin	addl	%d0, %d1	| B += A(H) (no carry; max 0xffff0000)
853a564f24Srin
863a564f24Srin	addl	%d3, %d1	| B += D
873a564f24Srin	bccs	1f		| if (carry)
883a564f24Srin	addil	#0x10000, %d2	| 	C += 0x10000
893a564f24Srin
903a564f24Srin1:	swap	%d1		| B(H) <--> B(L)
913a564f24Srin
923a564f24Srin| (%d0), (%d1), %d2 = C, %d3 = free, %d4 = A
933a564f24Srin
943a564f24Srin	clrl	%d3		| extract B(H)
953a564f24Srin	movew	%d1, %d3
963a564f24Srin
973a564f24Srin	movew	%d4, %d1	| %d1 = (B(L) << 16) + A(L)
983a564f24Srin
993a564f24Srin	addl	%d3, %d2	| C += B(H)
1003a564f24Srin
1013a564f24Srin| We have (X_L32 * Y_L32) in %d2:%d1. Lower 32bit was completed.
1023a564f24Srin| Add (X_L32 * Y_H32 + X_H32 * Y_L32) to higher 32bit.
1033a564f24Srin|
1043a564f24Srin| (%d0), (%d1), %d2 = C, %d3 = free, %d4 = free
1053a564f24Srin
1063a564f24Srin	movel	%a0, %d0	| restore X_L32
1073a564f24Srin	movel	%a1, %d3	| restore Y_L32
1083a564f24Srin	mulsl	Y_H32(%sp), %d0	| E = X_L32 * Y_H32
1093a564f24Srin	mulsl	X_H32(%sp), %d3 | F = X_H32 * Y_L32
1103a564f24Srin	addl	%d2, %d0	| E += C
1113a564f24Srin	addl	%d3, %d0	| %d0 = E + F
1123a564f24Srin
113*97690669Srin	moveml	(%sp)+, %d2-%d4	| pop %d2-%d4
1143a564f24Srin	rts
1153a564f24SrinEND(__muldi3)
116