xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/ia64/logops_n.asm (revision 63aea4bd5b445e491ff0389fe27ec78b3099dba3)
1dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
2dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
3
4dnl  Contributed to the GNU project by Torbjorn Granlund.
5
6dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
7dnl
8dnl  This file is part of the GNU MP Library.
9dnl
10dnl  The GNU MP Library is free software; you can redistribute it and/or modify
11dnl  it under the terms of the GNU Lesser General Public License as published
12dnl  by the Free Software Foundation; either version 3 of the License, or (at
13dnl  your option) any later version.
14dnl
15dnl  The GNU MP Library is distributed in the hope that it will be useful, but
16dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
18dnl  License for more details.
19dnl
20dnl  You should have received a copy of the GNU Lesser General Public License
21dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
22
23include(`../config.m4')
24
25C           cycles/limb
26C Itanium:      2
27C Itanium 2:    1
28
29C TODO
30C  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in
31C    wind-down code).
32
33C INPUT PARAMETERS
34define(`rp', `r32')
35define(`up', `r33')
36define(`vp', `r34')
37define(`n', `r35')
38
39ifdef(`OPERATION_and_n',
40`	define(`func',`mpn_and_n')
41	define(`logop',		`and	$1 = $2, $3')
42	define(`notormov',	`mov	$1 = $2')')
43ifdef(`OPERATION_andn_n',
44`	define(`func',`mpn_andn_n')
45	define(`logop',		`andcm	$1 = $2, $3')
46	define(`notormov',	`mov	$1 = $2')')
47ifdef(`OPERATION_nand_n',
48`	define(`func',`mpn_nand_n')
49	define(`logop',		`and	$1 = $2, $3')
50	define(`notormov',	`sub	$1 = -1, $2')')
51ifdef(`OPERATION_ior_n',
52`	define(`func',`mpn_ior_n')
53	define(`logop',		`or	$1 = $2, $3')
54	define(`notormov',	`mov	$1 = $2')')
55ifdef(`OPERATION_iorn_n',
56`	define(`func',`mpn_iorn_n')
57	define(`logop',		`andcm	$1 = $3, $2')
58	define(`notormov',	`sub	$1 = -1, $2')')
59ifdef(`OPERATION_nior_n',
60`	define(`func',`mpn_nior_n')
61	define(`logop',		`or	$1 = $2, $3')
62	define(`notormov',	`sub	$1 = -1, $2')')
63ifdef(`OPERATION_xor_n',
64`	define(`func',`mpn_xor_n')
65	define(`logop',		`xor	$1 = $2, $3')
66	define(`notormov',	`mov	$1 = $2')')
67ifdef(`OPERATION_xnor_n',
68`	define(`func',`mpn_xnor_n')
69	define(`logop',		`xor	$1 = $2, $3')
70	define(`notormov',	`sub	$1 = -1, $2')')
71
72MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
73
74ASM_START()
75PROLOGUE(func)
76	.prologue
77	.save	ar.lc, r2
78	.body
79ifdef(`HAVE_ABI_32',
80`	addp4	rp = 0, rp			C			M I
81	addp4	up = 0, up			C			M I
82	addp4	vp = 0, vp			C			M I
83	zxt4	n = n				C			I
84	;;
85')
86{.mmi
87	ld8		r10 = [up], 8		C			M
88	ld8		r11 = [vp], 8		C			M
89	mov.i		r2 = ar.lc		C			I0
90}
91{.mmi
92	and		r14 = 3, n		C			M I
93	cmp.lt		p15, p14 = 4, n		C			M I
94	shr.u		n = n, 2		C			I0
95	;;
96}
97{.mmi
98	cmp.eq		p6, p0 = 1, r14		C			M I
99	cmp.eq		p7, p0 = 2, r14		C			M I
100	cmp.eq		p8, p0 = 3, r14		C			M I
101}
102{.bbb
103   (p6)	br.dptk		.Lb01			C			B
104   (p7)	br.dptk		.Lb10			C			B
105   (p8)	br.dptk		.Lb11			C			B
106}
107
108.Lb00:	ld8		r17 = [up], 8		C			M
109	ld8		r21 = [vp], 8		C			M
110	add		n = -2, n		C			M I
111	;;
112	ld8		r18 = [up], 8		C			M
113	ld8		r22 = [vp], 8		C			M
114	;;
115	ld8		r19 = [up], 8		C			M
116	ld8		r23 = [vp], 8		C			M
117  (p15)	br.cond.dpnt	.grt4			C			B
118
119	logop(		r14, r10, r11)		C			M I
120	;;
121	logop(		r15, r17, r21)		C			M I
122	notormov(	r8, r14)		C			M I
123	br		.Lcj4			C			B
124
125.grt4:	logop(		r14, r10, r11)		C			M I
126	ld8		r16 = [up], 8		C			M
127	ld8		r20 = [vp], 8		C			M
128	;;
129	logop(		r15, r17, r21)		C			M I
130	ld8		r17 = [up], 8		C			M
131	mov.i		ar.lc = n		C			I0
132	notormov(	r8, r14)		C			M I
133	ld8		r21 = [vp], 8		C			M
134	br		.LL00			C			B
135
136.Lb01:	add		n = -1, n		C			M I
137	logop(		r15, r10, r11)		C			M I
138  (p15)	br.cond.dpnt	.grt1			C			B
139	;;
140
141	notormov(	r9, r15)		C			M I
142	br		.Lcj1			C			B
143
144.grt1:	ld8		r16 = [up], 8		C			M
145	ld8		r20 = [vp], 8		C			M
146	;;
147	ld8		r17 = [up], 8		C			M
148	ld8		r21 = [vp], 8		C			M
149	mov.i		ar.lc = n		C			I0
150	;;
151	ld8		r18 = [up], 8		C			M
152	ld8		r22 = [vp], 8		C			M
153	;;
154	ld8		r19 = [up], 8		C			M
155	ld8		r23 = [vp], 8		C			M
156	br.cloop.dptk	.grt5			C			B
157	;;
158
159	logop(		r14, r16, r20)		C			M I
160	notormov(	r9, r15)		C			M I
161	br		.Lcj5			C			B
162
163.grt5:	logop(		r14, r16, r20)		C			M I
164	ld8		r16 = [up], 8		C			M
165	notormov(	r9, r15)		C			M I
166	ld8		r20 = [vp], 8		C			M
167	br		.LL01			C			B
168
169.Lb10:	ld8		r19 = [up], 8		C			M
170	ld8		r23 = [vp], 8		C			M
171  (p15)	br.cond.dpnt	.grt2			C			B
172
173	logop(		r14, r10, r11)		C			M I
174	;;
175	logop(		r15, r19, r23)		C			M I
176	notormov(	r8, r14)		C			M I
177	br		.Lcj2			C			B
178
179.grt2:	ld8		r16 = [up], 8		C			M
180	ld8		r20 = [vp], 8		C			M
181	add		n = -1, n		C			M I
182	;;
183	ld8		r17 = [up], 8		C			M
184	ld8		r21 = [vp], 8		C			M
185	logop(		r14, r10, r11)		C			M I
186	;;
187	ld8		r18 = [up], 8		C			M
188	ld8		r22 = [vp], 8		C			M
189	mov.i		ar.lc = n		C			I0
190	;;
191	logop(		r15, r19, r23)		C			M I
192	ld8		r19 = [up], 8		C			M
193	notormov(	r8, r14)		C			M I
194	ld8		r23 = [vp], 8		C			M
195	br.cloop.dptk	.Loop			C			B
196	br		.Lcj6			C			B
197
198.Lb11:	ld8		r18 = [up], 8		C			M
199	ld8		r22 = [vp], 8		C			M
200	add		n = -1, n		C			M I
201	;;
202	ld8		r19 = [up], 8		C			M
203	ld8		r23 = [vp], 8		C			M
204	logop(		r15, r10, r11)		C			M I
205  (p15)	br.cond.dpnt	.grt3			C			B
206	;;
207
208	logop(		r14, r18, r22)		C			M I
209	notormov(	r9, r15)		C			M I
210	br		.Lcj3			C			B
211
212.grt3:	ld8		r16 = [up], 8		C			M
213	ld8		r20 = [vp], 8		C			M
214	;;
215	ld8		r17 = [up], 8		C			M
216	ld8		r21 = [vp], 8		C			M
217	mov.i		ar.lc = n		C			I0
218	;;
219	logop(		r14, r18, r22)		C			M I
220	ld8		r18 = [up], 8		C			M
221	notormov(	r9, r15)		C			M I
222	ld8		r22 = [vp], 8		C			M
223	br		.LL11			C			B
224
225C *** MAIN LOOP START ***
226	ALIGN(32)
227.Loop:	st8		[rp] = r8, 8		C			M
228	logop(		r14, r16, r20)		C			M I
229	notormov(	r9, r15)		C			M I
230	ld8		r16 = [up], 8		C			M
231	ld8		r20 = [vp], 8		C			M
232	nop.b		0
233	;;
234.LL01:	st8		[rp] = r9, 8		C			M
235	logop(		r15, r17, r21)		C			M I
236	notormov(	r8, r14)		C			M I
237	ld8		r17 = [up], 8		C			M
238	ld8		r21 = [vp], 8		C			M
239	nop.b		0
240	;;
241.LL00:	st8		[rp] = r8, 8		C			M
242	logop(		r14, r18, r22)		C			M I
243	notormov(	r9, r15)		C			M I
244	ld8		r18 = [up], 8		C			M
245	ld8		r22 = [vp], 8		C			M
246	nop.b		0
247	;;
248.LL11:	st8		[rp] = r9, 8		C			M
249	logop(		r15, r19, r23)		C			M I
250	notormov(	r8, r14)		C			M I
251	ld8		r19 = [up], 8		C			M
252	ld8		r23 = [vp], 8		C			M
253	br.cloop.dptk	.Loop	;;		C			B
254C *** MAIN LOOP END ***
255
256.Lcj6:	st8		[rp] = r8, 8		C			M
257	logop(		r14, r16, r20)		C			M I
258	notormov(	r9, r15)		C			M I
259	;;
260.Lcj5:	st8		[rp] = r9, 8		C			M
261	logop(		r15, r17, r21)		C			M I
262	notormov(	r8, r14)		C			M I
263	;;
264.Lcj4:	st8		[rp] = r8, 8		C			M
265	logop(		r14, r18, r22)		C			M I
266	notormov(	r9, r15)		C			M I
267	;;
268.Lcj3:	st8		[rp] = r9, 8		C			M
269	logop(		r15, r19, r23)		C			M I
270	notormov(	r8, r14)		C			M I
271	;;
272.Lcj2:	st8		[rp] = r8, 8		C			M
273	notormov(	r9, r15)		C			M I
274	;;
275.Lcj1:	st8		[rp] = r9, 8		C			M
276	mov.i		ar.lc = r2		C			I0
277	br.ret.sptk.many b0			C			B
278EPILOGUE()
279ASM_END()
280