xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/logops_n.asm (revision d90047b5d07facf36e6c01dcc0bded8997ce9cc2)
1dnl  AMD64 logops.
2
3dnl  Copyright 2004-2006, 2011, 2012 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C	     cycles/limb
35C AMD K8,K9	 1.5	with fluctuations for variant 2 and 3
36C AMD K10	 1.5	with fluctuations for all variants
37C Intel P4	 2.8/3.35/3.60 (variant1/variant2/variant3)
38C Intel core2	 2
39C Intel NHM	 2
40C Intel SBR	 1.5/1.75/1.75
41C Intel atom	 3.75
42C VIA nano	 3.25
43
44ifdef(`OPERATION_and_n',`
45  define(`func',`mpn_and_n')
46  define(`VARIANT_1')
47  define(`LOGOP',`andq')')
48ifdef(`OPERATION_andn_n',`
49  define(`func',`mpn_andn_n')
50  define(`VARIANT_2')
51  define(`LOGOP',`andq')')
52ifdef(`OPERATION_nand_n',`
53  define(`func',`mpn_nand_n')
54  define(`VARIANT_3')
55  define(`LOGOP',`andq')')
56ifdef(`OPERATION_ior_n',`
57  define(`func',`mpn_ior_n')
58  define(`VARIANT_1')
59  define(`LOGOP',`orq')')
60ifdef(`OPERATION_iorn_n',`
61  define(`func',`mpn_iorn_n')
62  define(`VARIANT_2')
63  define(`LOGOP',`orq')')
64ifdef(`OPERATION_nior_n',`
65  define(`func',`mpn_nior_n')
66  define(`VARIANT_3')
67  define(`LOGOP',`orq')')
68ifdef(`OPERATION_xor_n',`
69  define(`func',`mpn_xor_n')
70  define(`VARIANT_1')
71  define(`LOGOP',`xorq')')
72ifdef(`OPERATION_xnor_n',`
73  define(`func',`mpn_xnor_n')
74  define(`VARIANT_2')
75  define(`LOGOP',`xorq')')
76
77
78MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
79
80C INPUT PARAMETERS
81define(`rp',`%rdi')
82define(`up',`%rsi')
83define(`vp',`%rdx')
84define(`n',`%rcx')
85
86ABI_SUPPORT(DOS64)
87ABI_SUPPORT(STD64)
88
89ASM_START()
90
91ifdef(`VARIANT_1',`
92	TEXT
93	ALIGN(32)
94PROLOGUE(func)
95	FUNC_ENTRY(4)
96	movq	(vp), %r8
97	movl	R32(%rcx), R32(%rax)
98	leaq	(vp,n,8), vp
99	leaq	(up,n,8), up
100	leaq	(rp,n,8), rp
101	negq	n
102	andl	$3, R32(%rax)
103	je	L(b00)
104	cmpl	$2, R32(%rax)
105	jc	L(b01)
106	je	L(b10)
107
108L(b11):	LOGOP	(up,n,8), %r8
109	movq	%r8, (rp,n,8)
110	decq	n
111	jmp	L(e11)
112L(b10):	addq	$-2, n
113	jmp	L(e10)
114L(b01):	LOGOP	(up,n,8), %r8
115	movq	%r8, (rp,n,8)
116	incq	n
117	jz	L(ret)
118
119L(oop):	movq	(vp,n,8), %r8
120L(b00):	movq	8(vp,n,8), %r9
121	LOGOP	(up,n,8), %r8
122	LOGOP	8(up,n,8), %r9
123	nop
124	movq	%r8, (rp,n,8)
125	movq	%r9, 8(rp,n,8)
126L(e11):	movq	16(vp,n,8), %r8
127L(e10):	movq	24(vp,n,8), %r9
128	LOGOP	16(up,n,8), %r8
129	LOGOP	24(up,n,8), %r9
130	movq	%r8, 16(rp,n,8)
131	movq	%r9, 24(rp,n,8)
132	addq	$4, n
133	jnc	L(oop)
134L(ret):	FUNC_EXIT()
135	ret
136EPILOGUE()
137')
138
139ifdef(`VARIANT_2',`
140	TEXT
141	ALIGN(32)
142PROLOGUE(func)
143	FUNC_ENTRY(4)
144	movq	(vp), %r8
145	notq	%r8
146	movl	R32(%rcx), R32(%rax)
147	leaq	(vp,n,8), vp
148	leaq	(up,n,8), up
149	leaq	(rp,n,8), rp
150	negq	n
151	andl	$3, R32(%rax)
152	je	L(b00)
153	cmpl	$2, R32(%rax)
154	jc	L(b01)
155	je	L(b10)
156
157L(b11):	LOGOP	(up,n,8), %r8
158	movq	%r8, (rp,n,8)
159	decq	n
160	jmp	L(e11)
161L(b10):	addq	$-2, n
162	jmp	L(e10)
163	.byte	0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
164L(b01):	LOGOP	(up,n,8), %r8
165	movq	%r8, (rp,n,8)
166	incq	n
167	jz	L(ret)
168
169L(oop):	movq	(vp,n,8), %r8
170	notq	%r8
171L(b00):	movq	8(vp,n,8), %r9
172	notq	%r9
173	LOGOP	(up,n,8), %r8
174	LOGOP	8(up,n,8), %r9
175	movq	%r8, (rp,n,8)
176	movq	%r9, 8(rp,n,8)
177L(e11):	movq	16(vp,n,8), %r8
178	notq	%r8
179L(e10):	movq	24(vp,n,8), %r9
180	notq	%r9
181	LOGOP	16(up,n,8), %r8
182	LOGOP	24(up,n,8), %r9
183	movq	%r8, 16(rp,n,8)
184	movq	%r9, 24(rp,n,8)
185	addq	$4, n
186	jnc	L(oop)
187L(ret):	FUNC_EXIT()
188	ret
189EPILOGUE()
190')
191
192ifdef(`VARIANT_3',`
193	TEXT
194	ALIGN(32)
195PROLOGUE(func)
196	FUNC_ENTRY(4)
197	movq	(vp), %r8
198	movl	R32(%rcx), R32(%rax)
199	leaq	(vp,n,8), vp
200	leaq	(up,n,8), up
201	leaq	(rp,n,8), rp
202	negq	n
203	andl	$3, R32(%rax)
204	je	L(b00)
205	cmpl	$2, R32(%rax)
206	jc	L(b01)
207	je	L(b10)
208
209L(b11):	LOGOP	(up,n,8), %r8
210	notq	%r8
211	movq	%r8, (rp,n,8)
212	decq	n
213	jmp	L(e11)
214L(b10):	addq	$-2, n
215	jmp	L(e10)
216	.byte	0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
217L(b01):	LOGOP	(up,n,8), %r8
218	notq	%r8
219	movq	%r8, (rp,n,8)
220	incq	n
221	jz	L(ret)
222
223L(oop):	movq	(vp,n,8), %r8
224L(b00):	movq	8(vp,n,8), %r9
225	LOGOP	(up,n,8), %r8
226	notq	%r8
227	LOGOP	8(up,n,8), %r9
228	notq	%r9
229	movq	%r8, (rp,n,8)
230	movq	%r9, 8(rp,n,8)
231L(e11):	movq	16(vp,n,8), %r8
232L(e10):	movq	24(vp,n,8), %r9
233	LOGOP	16(up,n,8), %r8
234	notq	%r8
235	LOGOP	24(up,n,8), %r9
236	notq	%r9
237	movq	%r8, 16(rp,n,8)
238	movq	%r9, 24(rp,n,8)
239	addq	$4, n
240	jnc	L(oop)
241L(ret):	FUNC_EXIT()
242	ret
243EPILOGUE()
244')
245