xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/ia64/logops_n.asm (revision ce54336801cf28877c3414aa2fcb251dddd543a2)
1dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
2dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
3
4dnl  Contributed to the GNU project by Torbjorn Granlund.
5
6dnl  Copyright 2003-2005 Free Software Foundation, Inc.
7
8dnl  This file is part of the GNU MP Library.
9dnl
10dnl  The GNU MP Library is free software; you can redistribute it and/or modify
11dnl  it under the terms of either:
12dnl
13dnl    * the GNU Lesser General Public License as published by the Free
14dnl      Software Foundation; either version 3 of the License, or (at your
15dnl      option) any later version.
16dnl
17dnl  or
18dnl
19dnl    * the GNU General Public License as published by the Free Software
20dnl      Foundation; either version 2 of the License, or (at your option) any
21dnl      later version.
22dnl
23dnl  or both in parallel, as here.
24dnl
25dnl  The GNU MP Library is distributed in the hope that it will be useful, but
26dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
28dnl  for more details.
29dnl
30dnl  You should have received copies of the GNU General Public License and the
31dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
32dnl  see https://www.gnu.org/licenses/.
33
34include(`../config.m4')
35
36C           cycles/limb
37C Itanium:      2
38C Itanium 2:    1
39
40C TODO
41C  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in
42C    wind-down code).
43
44C INPUT PARAMETERS
45define(`rp', `r32')
46define(`up', `r33')
47define(`vp', `r34')
48define(`n', `r35')
49
50ifdef(`OPERATION_and_n',
51`	define(`func',`mpn_and_n')
52	define(`logop',		`and	$1 = $2, $3')
53	define(`notormov',	`mov	$1 = $2')')
54ifdef(`OPERATION_andn_n',
55`	define(`func',`mpn_andn_n')
56	define(`logop',		`andcm	$1 = $2, $3')
57	define(`notormov',	`mov	$1 = $2')')
58ifdef(`OPERATION_nand_n',
59`	define(`func',`mpn_nand_n')
60	define(`logop',		`and	$1 = $2, $3')
61	define(`notormov',	`sub	$1 = -1, $2')')
62ifdef(`OPERATION_ior_n',
63`	define(`func',`mpn_ior_n')
64	define(`logop',		`or	$1 = $2, $3')
65	define(`notormov',	`mov	$1 = $2')')
66ifdef(`OPERATION_iorn_n',
67`	define(`func',`mpn_iorn_n')
68	define(`logop',		`andcm	$1 = $3, $2')
69	define(`notormov',	`sub	$1 = -1, $2')')
70ifdef(`OPERATION_nior_n',
71`	define(`func',`mpn_nior_n')
72	define(`logop',		`or	$1 = $2, $3')
73	define(`notormov',	`sub	$1 = -1, $2')')
74ifdef(`OPERATION_xor_n',
75`	define(`func',`mpn_xor_n')
76	define(`logop',		`xor	$1 = $2, $3')
77	define(`notormov',	`mov	$1 = $2')')
78ifdef(`OPERATION_xnor_n',
79`	define(`func',`mpn_xnor_n')
80	define(`logop',		`xor	$1 = $2, $3')
81	define(`notormov',	`sub	$1 = -1, $2')')
82
83MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
84
85ASM_START()
86PROLOGUE(func)
87	.prologue
88	.save	ar.lc, r2
89	.body
90ifdef(`HAVE_ABI_32',
91`	addp4	rp = 0, rp			C			M I
92	addp4	up = 0, up			C			M I
93	addp4	vp = 0, vp			C			M I
94	nop.m		0
95	nop.m		0
96	zxt4	n = n				C			I
97	;;
98')
99{.mmi
100	ld8		r10 = [up], 8		C			M
101	ld8		r11 = [vp], 8		C			M
102	mov.i		r2 = ar.lc		C			I0
103}
104{.mmi
105	and		r14 = 3, n		C			M I
106	cmp.lt		p15, p14 = 4, n		C			M I
107	shr.u		n = n, 2		C			I0
108	;;
109}
110{.mmi
111	cmp.eq		p6, p0 = 1, r14		C			M I
112	cmp.eq		p7, p0 = 2, r14		C			M I
113	cmp.eq		p8, p0 = 3, r14		C			M I
114}
115{.bbb
116   (p6)	br.dptk		.Lb01			C			B
117   (p7)	br.dptk		.Lb10			C			B
118   (p8)	br.dptk		.Lb11			C			B
119}
120
121.Lb00:	ld8		r17 = [up], 8		C			M
122	ld8		r21 = [vp], 8		C			M
123	add		n = -2, n		C			M I
124	;;
125	ld8		r18 = [up], 8		C			M
126	ld8		r22 = [vp], 8		C			M
127	;;
128	ld8		r19 = [up], 8		C			M
129	ld8		r23 = [vp], 8		C			M
130  (p15)	br.cond.dpnt	.grt4			C			B
131
132	logop(		r14, r10, r11)		C			M I
133	;;
134	logop(		r15, r17, r21)		C			M I
135	notormov(	r8, r14)		C			M I
136	br		.Lcj4			C			B
137
138.grt4:	logop(		r14, r10, r11)		C			M I
139	ld8		r16 = [up], 8		C			M
140	ld8		r20 = [vp], 8		C			M
141	;;
142	logop(		r15, r17, r21)		C			M I
143	ld8		r17 = [up], 8		C			M
144	mov.i		ar.lc = n		C			I0
145	notormov(	r8, r14)		C			M I
146	ld8		r21 = [vp], 8		C			M
147	br		.LL00			C			B
148
149.Lb01:	add		n = -1, n		C			M I
150	logop(		r15, r10, r11)		C			M I
151  (p15)	br.cond.dpnt	.grt1			C			B
152	;;
153
154	notormov(	r9, r15)		C			M I
155	br		.Lcj1			C			B
156
157.grt1:	ld8		r16 = [up], 8		C			M
158	ld8		r20 = [vp], 8		C			M
159	;;
160	ld8		r17 = [up], 8		C			M
161	ld8		r21 = [vp], 8		C			M
162	mov.i		ar.lc = n		C			I0
163	;;
164	ld8		r18 = [up], 8		C			M
165	ld8		r22 = [vp], 8		C			M
166	;;
167	ld8		r19 = [up], 8		C			M
168	ld8		r23 = [vp], 8		C			M
169	br.cloop.dptk	.grt5			C			B
170	;;
171
172	logop(		r14, r16, r20)		C			M I
173	notormov(	r9, r15)		C			M I
174	br		.Lcj5			C			B
175
176.grt5:	logop(		r14, r16, r20)		C			M I
177	ld8		r16 = [up], 8		C			M
178	notormov(	r9, r15)		C			M I
179	ld8		r20 = [vp], 8		C			M
180	br		.LL01			C			B
181
182.Lb10:	ld8		r19 = [up], 8		C			M
183	ld8		r23 = [vp], 8		C			M
184  (p15)	br.cond.dpnt	.grt2			C			B
185
186	logop(		r14, r10, r11)		C			M I
187	;;
188	logop(		r15, r19, r23)		C			M I
189	notormov(	r8, r14)		C			M I
190	br		.Lcj2			C			B
191
192.grt2:	ld8		r16 = [up], 8		C			M
193	ld8		r20 = [vp], 8		C			M
194	add		n = -1, n		C			M I
195	;;
196	ld8		r17 = [up], 8		C			M
197	ld8		r21 = [vp], 8		C			M
198	logop(		r14, r10, r11)		C			M I
199	;;
200	ld8		r18 = [up], 8		C			M
201	ld8		r22 = [vp], 8		C			M
202	mov.i		ar.lc = n		C			I0
203	;;
204	logop(		r15, r19, r23)		C			M I
205	ld8		r19 = [up], 8		C			M
206	notormov(	r8, r14)		C			M I
207	ld8		r23 = [vp], 8		C			M
208	br.cloop.dptk	.Loop			C			B
209	br		.Lcj6			C			B
210
211.Lb11:	ld8		r18 = [up], 8		C			M
212	ld8		r22 = [vp], 8		C			M
213	add		n = -1, n		C			M I
214	;;
215	ld8		r19 = [up], 8		C			M
216	ld8		r23 = [vp], 8		C			M
217	logop(		r15, r10, r11)		C			M I
218  (p15)	br.cond.dpnt	.grt3			C			B
219	;;
220
221	logop(		r14, r18, r22)		C			M I
222	notormov(	r9, r15)		C			M I
223	br		.Lcj3			C			B
224
225.grt3:	ld8		r16 = [up], 8		C			M
226	ld8		r20 = [vp], 8		C			M
227	;;
228	ld8		r17 = [up], 8		C			M
229	ld8		r21 = [vp], 8		C			M
230	mov.i		ar.lc = n		C			I0
231	;;
232	logop(		r14, r18, r22)		C			M I
233	ld8		r18 = [up], 8		C			M
234	notormov(	r9, r15)		C			M I
235	ld8		r22 = [vp], 8		C			M
236	br		.LL11			C			B
237
238C *** MAIN LOOP START ***
239	ALIGN(32)
240.Loop:	st8		[rp] = r8, 8		C			M
241	logop(		r14, r16, r20)		C			M I
242	notormov(	r9, r15)		C			M I
243	ld8		r16 = [up], 8		C			M
244	ld8		r20 = [vp], 8		C			M
245	nop.b		0
246	;;
247.LL01:	st8		[rp] = r9, 8		C			M
248	logop(		r15, r17, r21)		C			M I
249	notormov(	r8, r14)		C			M I
250	ld8		r17 = [up], 8		C			M
251	ld8		r21 = [vp], 8		C			M
252	nop.b		0
253	;;
254.LL00:	st8		[rp] = r8, 8		C			M
255	logop(		r14, r18, r22)		C			M I
256	notormov(	r9, r15)		C			M I
257	ld8		r18 = [up], 8		C			M
258	ld8		r22 = [vp], 8		C			M
259	nop.b		0
260	;;
261.LL11:	st8		[rp] = r9, 8		C			M
262	logop(		r15, r19, r23)		C			M I
263	notormov(	r8, r14)		C			M I
264	ld8		r19 = [up], 8		C			M
265	ld8		r23 = [vp], 8		C			M
266	br.cloop.dptk	.Loop	;;		C			B
267C *** MAIN LOOP END ***
268
269.Lcj6:	st8		[rp] = r8, 8		C			M
270	logop(		r14, r16, r20)		C			M I
271	notormov(	r9, r15)		C			M I
272	;;
273.Lcj5:	st8		[rp] = r9, 8		C			M
274	logop(		r15, r17, r21)		C			M I
275	notormov(	r8, r14)		C			M I
276	;;
277.Lcj4:	st8		[rp] = r8, 8		C			M
278	logop(		r14, r18, r22)		C			M I
279	notormov(	r9, r15)		C			M I
280	;;
281.Lcj3:	st8		[rp] = r9, 8		C			M
282	logop(		r15, r19, r23)		C			M I
283	notormov(	r8, r14)		C			M I
284	;;
285.Lcj2:	st8		[rp] = r8, 8		C			M
286	notormov(	r9, r15)		C			M I
287	;;
288.Lcj1:	st8		[rp] = r9, 8		C			M
289	mov.i		ar.lc = r2		C			I0
290	br.ret.sptk.many b0			C			B
291EPILOGUE()
292ASM_END()
293