xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/pa64/umul.asm (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1dnl  Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
2
3dnl  This file is part of the GNU MP Library.
4dnl
5dnl  The GNU MP Library is free software; you can redistribute it and/or modify
6dnl  it under the terms of either:
7dnl
8dnl    * the GNU Lesser General Public License as published by the Free
9dnl      Software Foundation; either version 3 of the License, or (at your
10dnl      option) any later version.
11dnl
12dnl  or
13dnl
14dnl    * the GNU General Public License as published by the Free Software
15dnl      Foundation; either version 2 of the License, or (at your option) any
16dnl      later version.
17dnl
18dnl  or both in parallel, as here.
19dnl
20dnl  The GNU MP Library is distributed in the hope that it will be useful, but
21dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
23dnl  for more details.
24dnl
25dnl  You should have received copies of the GNU General Public License and the
26dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
27dnl  see https://www.gnu.org/licenses/.
28
29
30dnl  Optimizations:
31dnl  * Avoid skip instructions
32dnl  * Put carry-generating and carry-consuming insns consecutively
33dnl  * Don't allocate any stack, "home" positions for parameters could be used.
34
35include(`../config.m4')
36
37define(`p0',`%r28')
38define(`p1',`%r29')
39define(`t32',`%r19')
40define(`t0',`%r20')
41define(`t1',`%r21')
42define(`x',`%r22')
43define(`m0',`%r23')
44define(`m1',`%r24')
45
46ifdef(`HAVE_ABI_2_0w',
47`	.level	2.0w
48',`	.level	2.0
49')
50PROLOGUE(mpn_umul_ppmm_r)
51	ldo		128(%r30),%r30
52ifdef(`HAVE_ABI_2_0w',
53`	std		%r26,-64(%r30)
54	std		%r25,-56(%r30)
55	copy		%r24,%r31
56',`
57	depd		%r25,31,32,%r26
58	std		%r26,-64(%r30)
59	depd		%r23,31,32,%r24
60	std		%r24,-56(%r30)
61	ldw		-180(%r30),%r31
62')
63
64	fldd		-64(%r30),%fr4
65	fldd		-56(%r30),%fr5
66
67	xmpyu		%fr5R,%fr4R,%fr6
68	fstd		%fr6,-128(%r30)
69	xmpyu		%fr5R,%fr4L,%fr7
70	fstd		%fr7,-120(%r30)
71	xmpyu		%fr5L,%fr4R,%fr8
72	fstd		%fr8,-112(%r30)
73	xmpyu		%fr5L,%fr4L,%fr9
74	fstd		%fr9,-104(%r30)
75
76	depdi,z		1,31,1,t32		C t32 = 2^32
77
78	ldd		-128(%r30),p0		C lo = low 64 bit of product
79	ldd		-120(%r30),m0		C m0 = mid0 64 bit of product
80	ldd		-112(%r30),m1		C m1 = mid1 64 bit of product
81	ldd		-104(%r30),p1		C hi = high 64 bit of product
82
83	add,l,*nuv	m0,m1,x			C x = m1+m0
84	 add,l		t32,p1,p1		C propagate carry to mid of p1
85	depd,z		x,31,32,t0		C lo32(m1+m0)
86	add		t0,p0,p0
87	extrd,u		x,31,32,t1		C hi32(m1+m0)
88	add,dc		t1,p1,p1
89
90	std		p0,0(%r31)		C store low half of product
91ifdef(`HAVE_ABI_2_0w',
92`	copy		p1,%r28			C return val in %r28
93',`	extrd,u		p1,31,32,%r28		C return val in %r28,%r29
94')
95	bve		(%r2)
96	ldo		-128(%r30),%r30
97EPILOGUE(mpn_umul_ppmm_r)
98