xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/s390_64/mul_basecase.asm (revision ce54336801cf28877c3414aa2fcb251dddd543a2)
1dnl  S/390-64 mpn_mul_basecase.
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C            cycles/limb
34C z900		 ?
35C z990		23
36C z9		 ?
37C z10		28
38C z196		 ?
39
40C TODO
41C  * Perhaps add special case for un <= 2.
42C  * Replace loops by faster code.  The mul_1 and addmul_1 loops could be sped
43C    up by about 10%.
44
45C INPUT PARAMETERS
46define(`rp',	`%r2')
47define(`up',	`%r3')
48define(`un',	`%r4')
49define(`vp',	`%r5')
50define(`vn',	`%r6')
51
52define(`zero',	`%r8')
53
54ASM_START()
55PROLOGUE(mpn_mul_basecase)
56	cghi	un, 2
57	jhe	L(ge2)
58
59C un = vn = 1
60	lg	%r1, 0(vp)
61	mlg	%r0, 0(up)
62	stg	%r1, 0(rp)
63	stg	%r0, 8(rp)
64	br	%r14
65
66L(ge2):	C jne	L(gen)
67
68
69L(gen):
70C mul_1 =======================================================================
71
72	stmg	%r6, %r12, 48(%r15)
73	lghi	zero, 0
74	aghi	un, -1
75
76	lg	%r7, 0(vp)
77	lg	%r11, 0(up)
78	lghi	%r12, 8			C init index register
79	mlgr	%r10, %r7
80	lgr	%r9, un
81	stg	%r11, 0(rp)
82	cr	%r15, %r15		C clear carry flag
83
84L(tm):	lg	%r1, 0(%r12,up)
85	mlgr	%r0, %r7
86	alcgr	%r1, %r10
87	lgr	%r10, %r0		C copy high part to carry limb
88	stg	%r1, 0(%r12,rp)
89	la	%r12, 8(%r12)
90	brctg	%r9, L(tm)
91
92	alcgr	%r0, zero
93	stg	%r0, 0(%r12,rp)
94
95C addmul_1 loop ===============================================================
96
97	aghi	vn, -1
98	je	L(outer_end)
99L(outer_loop):
100
101	la	rp, 8(rp)		C rp += 1
102	la	vp, 8(vp)		C up += 1
103	lg	%r7, 0(vp)
104	lg	%r11, 0(up)
105	lghi	%r12, 8			C init index register
106	mlgr	%r10, %r7
107	lgr	%r9, un
108	alg	%r11, 0(rp)
109	stg	%r11, 0(rp)
110
111L(tam):	lg	%r1, 0(%r12,up)
112	lg	%r11, 0(%r12,rp)
113	mlgr	%r0, %r7
114	alcgr	%r1, %r11
115	alcgr	%r0, zero
116	algr	%r1, %r10
117	lgr	%r10, %r0
118	stg	%r1, 0(%r12,rp)
119	la	%r12, 8(%r12)
120	brctg	%r9, L(tam)
121
122	alcgr	%r0, zero
123	stg	%r0, 0(%r12,rp)
124
125	brctg	vn, L(outer_loop)
126L(outer_end):
127
128	lmg	%r6, %r12, 48(%r15)
129	br	%r14
130EPILOGUE()
131