xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/logops_n.asm (revision fa28c6faa16e0b00edee7acdcaf4899797043def)
1dnl  AMD64 logops.
2
3dnl  Copyright 2004, 2005, 2006, 2011, 2012 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C	     cycles/limb
24C AMD K8,K9	 1.5	with fluctuations for variant 2 and 3
25C AMD K10	 1.5	with fluctuations for all variants
26C Intel P4	 2.8/3.35/3.60 (variant1/variant2/variant3)
27C Intel core2	 2
28C Intel NHM	 2
29C Intel SBR	 1.5/1.75/1.75
30C Intel atom	 3.75
31C VIA nano	 3.25
32
33ifdef(`OPERATION_and_n',`
34  define(`func',`mpn_and_n')
35  define(`VARIANT_1')
36  define(`LOGOP',`andq')')
37ifdef(`OPERATION_andn_n',`
38  define(`func',`mpn_andn_n')
39  define(`VARIANT_2')
40  define(`LOGOP',`andq')')
41ifdef(`OPERATION_nand_n',`
42  define(`func',`mpn_nand_n')
43  define(`VARIANT_3')
44  define(`LOGOP',`andq')')
45ifdef(`OPERATION_ior_n',`
46  define(`func',`mpn_ior_n')
47  define(`VARIANT_1')
48  define(`LOGOP',`orq')')
49ifdef(`OPERATION_iorn_n',`
50  define(`func',`mpn_iorn_n')
51  define(`VARIANT_2')
52  define(`LOGOP',`orq')')
53ifdef(`OPERATION_nior_n',`
54  define(`func',`mpn_nior_n')
55  define(`VARIANT_3')
56  define(`LOGOP',`orq')')
57ifdef(`OPERATION_xor_n',`
58  define(`func',`mpn_xor_n')
59  define(`VARIANT_1')
60  define(`LOGOP',`xorq')')
61ifdef(`OPERATION_xnor_n',`
62  define(`func',`mpn_xnor_n')
63  define(`VARIANT_2')
64  define(`LOGOP',`xorq')')
65
66
67MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
68
69C INPUT PARAMETERS
70define(`rp',`%rdi')
71define(`up',`%rsi')
72define(`vp',`%rdx')
73define(`n',`%rcx')
74
75ABI_SUPPORT(DOS64)
76ABI_SUPPORT(STD64)
77
78ASM_START()
79
80ifdef(`VARIANT_1',`
81	TEXT
82	ALIGN(32)
83PROLOGUE(func)
84	FUNC_ENTRY(4)
85	movq	(vp), %r8
86	movl	R32(%rcx), R32(%rax)
87	leaq	(vp,n,8), vp
88	leaq	(up,n,8), up
89	leaq	(rp,n,8), rp
90	negq	n
91	andl	$3, R32(%rax)
92	je	L(b00)
93	cmpl	$2, R32(%rax)
94	jc	L(b01)
95	je	L(b10)
96
97L(b11):	LOGOP	(up,n,8), %r8
98	movq	%r8, (rp,n,8)
99	decq	n
100	jmp	L(e11)
101L(b10):	addq	$-2, n
102	jmp	L(e10)
103L(b01):	LOGOP	(up,n,8), %r8
104	movq	%r8, (rp,n,8)
105	incq	n
106	jz	L(ret)
107
108L(oop):	movq	(vp,n,8), %r8
109L(b00):	movq	8(vp,n,8), %r9
110	LOGOP	(up,n,8), %r8
111	LOGOP	8(up,n,8), %r9
112	nop
113	movq	%r8, (rp,n,8)
114	movq	%r9, 8(rp,n,8)
115L(e11):	movq	16(vp,n,8), %r8
116L(e10):	movq	24(vp,n,8), %r9
117	LOGOP	16(up,n,8), %r8
118	LOGOP	24(up,n,8), %r9
119	movq	%r8, 16(rp,n,8)
120	movq	%r9, 24(rp,n,8)
121	addq	$4, n
122	jnc	L(oop)
123L(ret):	FUNC_EXIT()
124	ret
125EPILOGUE()
126')
127
128ifdef(`VARIANT_2',`
129	TEXT
130	ALIGN(32)
131PROLOGUE(func)
132	FUNC_ENTRY(4)
133	movq	(vp), %r8
134	notq	%r8
135	movl	R32(%rcx), R32(%rax)
136	leaq	(vp,n,8), vp
137	leaq	(up,n,8), up
138	leaq	(rp,n,8), rp
139	negq	n
140	andl	$3, R32(%rax)
141	je	L(b00)
142	cmpl	$2, R32(%rax)
143	jc	L(b01)
144	je	L(b10)
145
146L(b11):	LOGOP	(up,n,8), %r8
147	movq	%r8, (rp,n,8)
148	decq	n
149	jmp	L(e11)
150L(b10):	addq	$-2, n
151	jmp	L(e10)
152	.byte	0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
153L(b01):	LOGOP	(up,n,8), %r8
154	movq	%r8, (rp,n,8)
155	incq	n
156	jz	L(ret)
157
158L(oop):	movq	(vp,n,8), %r8
159	notq	%r8
160L(b00):	movq	8(vp,n,8), %r9
161	notq	%r9
162	LOGOP	(up,n,8), %r8
163	LOGOP	8(up,n,8), %r9
164	movq	%r8, (rp,n,8)
165	movq	%r9, 8(rp,n,8)
166L(e11):	movq	16(vp,n,8), %r8
167	notq	%r8
168L(e10):	movq	24(vp,n,8), %r9
169	notq	%r9
170	LOGOP	16(up,n,8), %r8
171	LOGOP	24(up,n,8), %r9
172	movq	%r8, 16(rp,n,8)
173	movq	%r9, 24(rp,n,8)
174	addq	$4, n
175	jnc	L(oop)
176L(ret):	FUNC_EXIT()
177	ret
178EPILOGUE()
179')
180
181ifdef(`VARIANT_3',`
182	TEXT
183	ALIGN(32)
184PROLOGUE(func)
185	FUNC_ENTRY(4)
186	movq	(vp), %r8
187	movl	R32(%rcx), R32(%rax)
188	leaq	(vp,n,8), vp
189	leaq	(up,n,8), up
190	leaq	(rp,n,8), rp
191	negq	n
192	andl	$3, R32(%rax)
193	je	L(b00)
194	cmpl	$2, R32(%rax)
195	jc	L(b01)
196	je	L(b10)
197
198L(b11):	LOGOP	(up,n,8), %r8
199	notq	%r8
200	movq	%r8, (rp,n,8)
201	decq	n
202	jmp	L(e11)
203L(b10):	addq	$-2, n
204	jmp	L(e10)
205	.byte	0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
206L(b01):	LOGOP	(up,n,8), %r8
207	notq	%r8
208	movq	%r8, (rp,n,8)
209	incq	n
210	jz	L(ret)
211
212L(oop):	movq	(vp,n,8), %r8
213L(b00):	movq	8(vp,n,8), %r9
214	LOGOP	(up,n,8), %r8
215	notq	%r8
216	LOGOP	8(up,n,8), %r9
217	notq	%r9
218	movq	%r8, (rp,n,8)
219	movq	%r9, 8(rp,n,8)
220L(e11):	movq	16(vp,n,8), %r8
221L(e10):	movq	24(vp,n,8), %r9
222	LOGOP	16(up,n,8), %r8
223	notq	%r8
224	LOGOP	24(up,n,8), %r9
225	notq	%r9
226	movq	%r8, 16(rp,n,8)
227	movq	%r9, 24(rp,n,8)
228	addq	$4, n
229	jnc	L(oop)
230L(ret):	FUNC_EXIT()
231	ret
232EPILOGUE()
233')
234