xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/pentium/mul_1.asm (revision ce54336801cf28877c3414aa2fcb251dddd543a2)
1dnl  Intel Pentium mpn_mul_1 -- mpn by limb multiplication.
2
3dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
4dnl  Inc.
5
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of either:
10dnl
11dnl    * the GNU Lesser General Public License as published by the Free
12dnl      Software Foundation; either version 3 of the License, or (at your
13dnl      option) any later version.
14dnl
15dnl  or
16dnl
17dnl    * the GNU General Public License as published by the Free Software
18dnl      Foundation; either version 2 of the License, or (at your option) any
19dnl      later version.
20dnl
21dnl  or both in parallel, as here.
22dnl
23dnl  The GNU MP Library is distributed in the hope that it will be useful, but
24dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26dnl  for more details.
27dnl
28dnl  You should have received copies of the GNU General Public License and the
29dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
30dnl  see https://www.gnu.org/licenses/.
31
32include(`../config.m4')
33
34
35C P5: 12.0 cycles/limb
36
37
38C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
39C                      mp_limb_t multiplier);
40C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
41C                       mp_limb_t multiplier, mp_limb_t carry);
42C
43
44defframe(PARAM_CARRY,     20)
45defframe(PARAM_MULTIPLIER,16)
46defframe(PARAM_SIZE,      12)
47defframe(PARAM_SRC,       8)
48defframe(PARAM_DST,       4)
49
50	TEXT
51	ALIGN(8)
52PROLOGUE(mpn_mul_1c)
53deflit(`FRAME',0)
54
55	movl	PARAM_CARRY, %ecx
56	pushl	%esi		FRAME_pushl()
57
58	jmp	L(start_1c)
59
60EPILOGUE()
61
62
63	ALIGN(8)
64PROLOGUE(mpn_mul_1)
65deflit(`FRAME',0)
66
67	xorl	%ecx, %ecx
68	pushl	%esi		FRAME_pushl()
69
70L(start_1c):
71	movl	PARAM_SRC, %esi
72	movl	PARAM_SIZE, %eax
73
74	shrl	%eax
75	jnz	L(two_or_more)
76
77
78	C one limb only
79
80	movl	(%esi), %eax
81
82	mull	PARAM_MULTIPLIER
83
84	addl	%eax, %ecx
85	movl	PARAM_DST, %eax
86
87	adcl	$0, %edx
88	popl	%esi
89
90	movl	%ecx, (%eax)
91	movl	%edx, %eax
92
93	ret
94
95
96L(two_or_more):
97	C eax	size/2
98	C ebx
99	C ecx	carry
100	C edx
101	C esi	src
102	C edi
103	C ebp
104
105	pushl	%edi		FRAME_pushl()
106	pushl	%ebx		FRAME_pushl()
107
108	movl	PARAM_DST, %edi
109	leal	-1(%eax), %ebx		C size/2-1
110
111	notl	%ebx			C -size, preserve carry
112
113	leal	(%esi,%eax,8), %esi	C src end
114	leal	(%edi,%eax,8), %edi	C dst end
115
116	pushl	%ebp		FRAME_pushl()
117	jnc	L(top)
118
119
120	C size was odd, process one limb separately
121
122	movl	(%esi,%ebx,8), %eax
123	addl	$4, %esi
124
125	mull	PARAM_MULTIPLIER
126
127	addl	%ecx, %eax
128	movl	%edx, %ecx
129
130	movl	%eax, (%edi,%ebx,8)
131	leal	4(%edi), %edi
132
133
134L(top):
135	C eax
136	C ebx	counter, negative
137	C ecx	carry
138	C edx
139	C esi	src end
140	C edi	dst end
141	C ebp
142
143	adcl	$0, %ecx
144	movl	(%esi,%ebx,8), %eax
145
146	mull	PARAM_MULTIPLIER
147
148	movl	%edx, %ebp
149	addl	%eax, %ecx
150
151	adcl	$0, %ebp
152	movl	4(%esi,%ebx,8), %eax
153
154	mull	PARAM_MULTIPLIER
155
156	movl	%ecx, (%edi,%ebx,8)
157	addl	%ebp, %eax
158
159	movl	%eax, 4(%edi,%ebx,8)
160	incl	%ebx
161
162	movl	%edx, %ecx
163	jnz	L(top)
164
165
166	adcl	$0, %ecx
167	popl	%ebp
168
169	movl	%ecx, %eax
170	popl	%ebx
171
172	popl	%edi
173	popl	%esi
174
175	ret
176
177EPILOGUE()
178