xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/ia64/copyd.asm (revision ce54336801cf28877c3414aa2fcb251dddd543a2)
1dnl  IA-64 mpn_copyd -- copy limb vector, decrementing.
2
3dnl  Contributed to the GNU project by Torbjorn Granlund.
4
5dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of either:
11dnl
12dnl    * the GNU Lesser General Public License as published by the Free
13dnl      Software Foundation; either version 3 of the License, or (at your
14dnl      option) any later version.
15dnl
16dnl  or
17dnl
18dnl    * the GNU General Public License as published by the Free Software
19dnl      Foundation; either version 2 of the License, or (at your option) any
20dnl      later version.
21dnl
22dnl  or both in parallel, as here.
23dnl
24dnl  The GNU MP Library is distributed in the hope that it will be useful, but
25dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27dnl  for more details.
28dnl
29dnl  You should have received copies of the GNU General Public License and the
30dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
31dnl  see https://www.gnu.org/licenses/.
32
33include(`../config.m4')
34
35C         cycles/limb
36C Itanium:    1
37C Itanium 2:  0.5
38
39C INPUT PARAMETERS
40C rp = r32
41C sp = r33
42C n = r34
43
44ASM_START()
45PROLOGUE(mpn_copyd)
46	.prologue
47	.save ar.lc, r2
48	.body
49ifdef(`HAVE_ABI_32',
50`	addp4		r32 = 0, r32
51	addp4		r33 = 0, r33
52	sxt4		r34 = r34
53	;;
54')
55{.mmi
56	shladd		r32 = r34, 3, r32
57	shladd		r33 = r34, 3, r33
58	mov.i		r2 = ar.lc
59}
60{.mmi
61	and		r14 = 3, r34
62	cmp.ge		p14, p15 = 3, r34
63	add		r34 = -4, r34
64	;;
65}
66{.mmi
67	cmp.eq		p8, p0 = 1, r14
68	cmp.eq		p10, p0 = 2, r14
69	cmp.eq		p12, p0 = 3, r14
70}
71{.bbb
72  (p8)	br.dptk		.Lb01
73  (p10)	br.dptk		.Lb10
74  (p12)	br.dptk		.Lb11
75}
76
77.Lb00:	C  n = 0, 4, 8, 12, ...
78	add		r32 = -8, r32
79	add		r33 = -8, r33
80  (p14)	br.dptk		.Ls00
81	;;
82	add		r21 = -8, r33
83	ld8		r16 = [r33], -16
84	shr		r15 = r34, 2
85	;;
86	ld8		r17 = [r21], -16
87	mov.i		ar.lc = r15
88	ld8		r18 = [r33], -16
89	add		r20 = -8, r32
90	;;
91	ld8		r19 = [r21], -16
92	br.cloop.dptk	.Loop
93	;;
94	br.sptk		.Lend
95	;;
96
97.Lb01:	C  n = 1, 5, 9, 13, ...
98	add		r21 = -8, r33
99	add		r20 = -8, r32
100	add		r33 = -16, r33
101	add		r32 = -16, r32
102	;;
103	ld8		r19 = [r21], -16
104	shr		r15 = r34, 2
105  (p14)	br.dptk		.Ls01
106	;;
107	ld8		r16 = [r33], -16
108	mov.i		ar.lc = r15
109	;;
110	ld8		r17 = [r21], -16
111	ld8		r18 = [r33], -16
112	br.sptk		.Li01
113	;;
114
115.Lb10:	C  n = 2,6, 10, 14, ...
116	add		r21 = -16, r33
117	shr		r15 = r34, 2
118	add		r20 = -16, r32
119	add		r32 = -8, r32
120	add		r33 = -8, r33
121	;;
122	ld8		r18 = [r33], -16
123	ld8		r19 = [r21], -16
124	mov.i		ar.lc = r15
125  (p14)	br.dptk		.Ls10
126	;;
127	ld8		r16 = [r33], -16
128	ld8		r17 = [r21], -16
129	br.sptk		.Li10
130	;;
131
132.Lb11:	C  n = 3, 7, 11, 15, ...
133	add		r21 = -8, r33
134	add		r20 = -8, r32
135	add		r33 = -16, r33
136	add		r32 = -16, r32
137	;;
138	ld8		r17 = [r21], -16
139	shr		r15 = r34, 2
140	;;
141	ld8		r18 = [r33], -16
142	mov.i		ar.lc = r15
143	ld8		r19 = [r21], -16
144  (p14)	br.dptk		.Ls11
145	;;
146	ld8		r16 = [r33], -16
147	br.sptk		.Li11
148	;;
149
150	ALIGN(32)
151.Loop:
152.Li00:
153{.mmb
154	st8		[r32] = r16, -16
155	ld8		r16 = [r33], -16
156	nop.b		0
157}
158.Li11:
159{.mmb
160	st8		[r20] = r17, -16
161	ld8		r17 = [r21], -16
162	nop.b		0
163	;;
164}
165.Li10:
166{.mmb
167	st8		[r32] = r18, -16
168	ld8		r18 = [r33], -16
169	nop.b		0
170}
171.Li01:
172{.mmb
173	st8		[r20] = r19, -16
174	ld8		r19 = [r21], -16
175	br.cloop.dptk	.Loop
176	;;
177}
178.Lend:	st8		[r32] = r16, -16
179.Ls11:	st8		[r20] = r17, -16
180	;;
181.Ls10:	st8		[r32] = r18, -16
182.Ls01:	st8		[r20] = r19, -16
183.Ls00:	mov.i		ar.lc = r2
184	br.ret.sptk.many b0
185EPILOGUE()
186ASM_END()
187