xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/pentium/mod_34lsub1.asm (revision 2dd295436a0082eb4f8d294f4aa73c223413d0f2)
1dnl  Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
2
3dnl  Copyright 2000-2002 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C P5: 1.66 cycles/limb
35
36
37C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
38C
39
40defframe(PARAM_SIZE, 8)
41defframe(PARAM_SRC,  4)
42
43	TEXT
44	ALIGN(16)
45PROLOGUE(mpn_mod_34lsub1)
46deflit(`FRAME',0)
47
48	movl	PARAM_SIZE, %ecx
49	movl	PARAM_SRC, %edx
50
51	subl	$2, %ecx
52	ja	L(three_or_more)
53
54	movl	(%edx), %eax
55	jne	L(one)
56
57
58	movl	4(%edx), %ecx
59	movl	%eax, %edx
60
61	shrl	$24, %edx
62	andl	$0xFFFFFF, %eax
63
64	addl	%edx, %eax
65	movl	%ecx, %edx
66
67	shrl	$16, %ecx
68	andl	$0xFFFF, %edx
69
70	shll	$8, %edx
71	addl	%ecx, %eax
72
73	addl	%edx, %eax
74
75L(one):
76	ret
77
78
79L(three_or_more):
80	C eax
81	C ebx
82	C ecx	size-2
83	C edx	src
84	C esi
85	C edi
86	C ebp
87
88	pushl	%ebx	FRAME_pushl()
89	pushl	%esi	FRAME_pushl()
90
91	pushl	%edi	FRAME_pushl()
92	pushl	%ebp	FRAME_pushl()
93
94	xorl	%esi, %esi		C 0mod3
95	xorl	%edi, %edi		C 1mod3
96
97	xorl	%ebp, %ebp		C 2mod3, and clear carry
98
99L(top):
100	C eax	scratch
101	C ebx	scratch
102	C ecx	counter, limbs
103	C edx	src
104	C esi	0mod3
105	C edi	1mod3
106	C ebp	2mod3
107
108	movl	(%edx), %eax
109	movl	4(%edx), %ebx
110
111	adcl	%eax, %esi
112	movl	8(%edx), %eax
113
114	adcl	%ebx, %edi
115	leal	12(%edx), %edx
116
117	adcl	%eax, %ebp
118	leal	-2(%ecx), %ecx
119
120	decl	%ecx
121	jg	L(top)
122
123
124	C ecx is -2, -1 or 0, representing 0, 1 or 2 more limbs, respectively
125
126	movl	$0xFFFFFFFF, %ebx	C mask
127	incl	%ecx
128
129	js	L(combine)		C 0 more
130
131	movl	(%edx), %eax
132	movl	$0xFFFFFF00, %ebx
133
134	adcl	%eax, %esi
135	decl	%ecx
136
137	js	L(combine)		C 1 more
138
139	movl	4(%edx), %eax
140	movl	$0xFFFF0000, %ebx
141
142	adcl	%eax, %edi
143
144
145
146L(combine):
147	C eax
148	C ebx	mask
149	C ecx
150	C edx
151	C esi	0mod3
152	C edi	1mod3
153	C ebp	2mod3
154
155	sbbl	%ecx, %ecx		C carry
156	movl	%esi, %eax		C 0mod3
157
158	andl	%ebx, %ecx		C masked for position
159	andl	$0xFFFFFF, %eax		C 0mod3 low
160
161	shrl	$24, %esi		C 0mod3 high
162	subl	%ecx, %eax		C apply carry
163
164	addl	%esi, %eax		C apply 0mod3
165	movl	%edi, %ebx		C 1mod3
166
167	shrl	$16, %edi		C 1mod3 high
168	andl	$0x0000FFFF, %ebx
169
170	shll	$8, %ebx		C 1mod3 low
171	addl	%edi, %eax		C apply 1mod3 high
172
173	addl	%ebx, %eax		C apply 1mod3 low
174	movl	%ebp, %ebx		C 2mod3
175
176	shrl	$8, %ebp		C 2mod3 high
177	andl	$0xFF, %ebx
178
179	shll	$16, %ebx		C 2mod3 low
180	addl	%ebp, %eax		C apply 2mod3 high
181
182	addl	%ebx, %eax		C apply 2mod3 low
183
184	popl	%ebp
185	popl	%edi
186
187	popl	%esi
188	popl	%ebx
189
190	ret
191
192EPILOGUE()
193